{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Data retrieval"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "from bs4 import BeautifulSoup\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import os\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 429,
   "metadata": {},
   "outputs": [],
   "source": [
    "seed_urls = ['https://inshorts.com/en/read/technology',\n",
    "             'https://inshorts.com/en/read/sports',\n",
    "             'https://inshorts.com/en/read/world']\n",
    "\n",
    "def build_dataset(seed_urls):\n",
    "    news_data = []\n",
    "    for url in seed_urls:\n",
    "        news_category = url.split('/')[-1]\n",
    "        data = requests.get(url)\n",
    "        soup = BeautifulSoup(data.content, 'html.parser')\n",
    "        \n",
    "        news_articles = [{'news_headline': headline.find('span', \n",
    "                                                         attrs={\"itemprop\": \"headline\"}).string,\n",
    "                          'news_article': article.find('div', \n",
    "                                                       attrs={\"itemprop\": \"articleBody\"}).string,\n",
    "                          'news_category': news_category}\n",
    "                         \n",
    "                            for headline, article in \n",
    "                             zip(soup.find_all('div', \n",
    "                                               class_=[\"news-card-title news-right-box\"]),\n",
    "                                 soup.find_all('div', \n",
    "                                               class_=[\"news-card-content news-right-box\"]))\n",
    "                        ]\n",
    "        news_data.extend(news_articles)\n",
    "        \n",
    "    df =  pd.DataFrame(news_data)\n",
    "    df = df[['news_headline', 'news_article', 'news_category']]\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 430,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>news_headline</th>\n",
       "      <th>news_article</th>\n",
       "      <th>news_category</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>World's cheapest phone 'Freedom 251' maker's f...</td>\n",
       "      <td>The maker of world's cheapest smartphone 'Free...</td>\n",
       "      <td>technology</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>US unveils world's most powerful supercomputer...</td>\n",
       "      <td>The US has unveiled the world's most powerful ...</td>\n",
       "      <td>technology</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>FB bug changed 1.4 cr users’ privacy setting t...</td>\n",
       "      <td>Facebook has said it recently found a bug that...</td>\n",
       "      <td>technology</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Contest for 1st couple to marry in self-drivin...</td>\n",
       "      <td>The American Automobile Association has launch...</td>\n",
       "      <td>technology</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>China's ZTE to pay $1 billion fine to US to li...</td>\n",
       "      <td>Chinese telecommunications equipment maker ZTE...</td>\n",
       "      <td>technology</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Android Co-founder's startup unveils magnetic ...</td>\n",
       "      <td>Android Co-founder Andy Rubin's startup Essent...</td>\n",
       "      <td>technology</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Yahoo Messenger to shut down 20 years after la...</td>\n",
       "      <td>Yahoo has announced it is discontinuing its Me...</td>\n",
       "      <td>technology</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Google won't design AI for weapons, surveillan...</td>\n",
       "      <td>Google CEO Sundar Pichai has clarified the com...</td>\n",
       "      <td>technology</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Virgin Hyperloop One may allow riders to see t...</td>\n",
       "      <td>Richard Branson-led Virgin Hyperloop One has s...</td>\n",
       "      <td>technology</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Apple patents wearable device to monitor blood...</td>\n",
       "      <td>Apple has been granted the patent for a wearab...</td>\n",
       "      <td>technology</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                       news_headline  \\\n",
       "0  World's cheapest phone 'Freedom 251' maker's f...   \n",
       "1  US unveils world's most powerful supercomputer...   \n",
       "2  FB bug changed 1.4 cr users’ privacy setting t...   \n",
       "3  Contest for 1st couple to marry in self-drivin...   \n",
       "4  China's ZTE to pay $1 billion fine to US to li...   \n",
       "5  Android Co-founder's startup unveils magnetic ...   \n",
       "6  Yahoo Messenger to shut down 20 years after la...   \n",
       "7  Google won't design AI for weapons, surveillan...   \n",
       "8  Virgin Hyperloop One may allow riders to see t...   \n",
       "9  Apple patents wearable device to monitor blood...   \n",
       "\n",
       "                                        news_article news_category  \n",
       "0  The maker of world's cheapest smartphone 'Free...    technology  \n",
       "1  The US has unveiled the world's most powerful ...    technology  \n",
       "2  Facebook has said it recently found a bug that...    technology  \n",
       "3  The American Automobile Association has launch...    technology  \n",
       "4  Chinese telecommunications equipment maker ZTE...    technology  \n",
       "5  Android Co-founder Andy Rubin's startup Essent...    technology  \n",
       "6  Yahoo has announced it is discontinuing its Me...    technology  \n",
       "7  Google CEO Sundar Pichai has clarified the com...    technology  \n",
       "8  Richard Branson-led Virgin Hyperloop One has s...    technology  \n",
       "9  Apple has been granted the patent for a wearab...    technology  "
      ]
     },
     "execution_count": 430,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "news_df = build_dataset(seed_urls)\n",
    "news_df.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 431,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "world         25\n",
       "sports        25\n",
       "technology    24\n",
       "Name: news_category, dtype: int64"
      ]
     },
     "execution_count": 431,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "news_df.news_category.value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Text Wrangling and Pre-processing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import spacy\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import nltk\n",
    "from nltk.tokenize.toktok import ToktokTokenizer\n",
    "import re\n",
    "from bs4 import BeautifulSoup\n",
    "from contractions import CONTRACTION_MAP\n",
    "import unicodedata\n",
    "\n",
    "nlp = spacy.load('en_core', parse = True, tag=True, entity=True)\n",
    "#nlp_vec = spacy.load('en_vecs', parse = True, tag=True, entity=True)\n",
    "tokenizer = ToktokTokenizer()\n",
    "stopword_list = nltk.corpus.stopwords.words('english')\n",
    "stopword_list.remove('no')\n",
    "stopword_list.remove('not')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Remove HTML tags"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Some important text'"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def strip_html_tags(text):\n",
    "    soup = BeautifulSoup(text, \"html.parser\")\n",
    "    stripped_text = soup.get_text()\n",
    "    return stripped_text\n",
    "\n",
    "strip_html_tags('<html><h2>Some important text</h2></html>')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Remove accented characters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Some Accented text'"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def remove_accented_chars(text):\n",
    "    text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8', 'ignore')\n",
    "    return text\n",
    "\n",
    "remove_accented_chars('Sómě Áccěntěd těxt')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Expand contractions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'You all cannot expand contractions I would think'"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def expand_contractions(text, contraction_mapping=CONTRACTION_MAP):\n",
    "    \n",
    "    contractions_pattern = re.compile('({})'.format('|'.join(contraction_mapping.keys())), \n",
    "                                      flags=re.IGNORECASE|re.DOTALL)\n",
    "    def expand_match(contraction):\n",
    "        match = contraction.group(0)\n",
    "        first_char = match[0]\n",
    "        expanded_contraction = contraction_mapping.get(match)\\\n",
    "                                if contraction_mapping.get(match)\\\n",
    "                                else contraction_mapping.get(match.lower())                       \n",
    "        expanded_contraction = first_char+expanded_contraction[1:]\n",
    "        return expanded_contraction\n",
    "        \n",
    "    expanded_text = contractions_pattern.sub(expand_match, text)\n",
    "    expanded_text = re.sub(\"'\", \"\", expanded_text)\n",
    "    return expanded_text\n",
    "\n",
    "expand_contractions(\"Y'all can't expand contractions I'd think\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Remove special characters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Well this was fun What do you think '"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def remove_special_characters(text, remove_digits=False):\n",
    "    pattern = r'[^a-zA-z0-9\\s]' if not remove_digits else r'[^a-zA-z\\s]'\n",
    "    text = re.sub(pattern, '', text)\n",
    "    return text\n",
    "\n",
    "remove_special_characters(\"Well this was fun! What do you think? 123#@!\", \n",
    "                          remove_digits=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Text lemmatization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'My system keep crash ! his crash yesterday , ours crash daily'"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def lemmatize_text(text):\n",
    "    text = nlp(text)\n",
    "    text = ' '.join([word.lemma_ if word.lemma_ != '-PRON-' else word.text for word in text])\n",
    "    return text\n",
    "\n",
    "lemmatize_text(\"My system keeps crashing! his crashed yesterday, ours crashes daily\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Text stemming"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'My system keep crash hi crash yesterday, our crash daili'"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def simple_stemmer(text):\n",
    "    ps = nltk.porter.PorterStemmer()\n",
    "    text = ' '.join([ps.stem(word) for word in text.split()])\n",
    "    return text\n",
    "\n",
    "simple_stemmer(\"My system keeps crashing his crashed yesterday, ours crashes daily\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Remove stopwords"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "', , stopwords , computer not'"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def remove_stopwords(text, is_lower_case=False):\n",
    "    tokens = tokenizer.tokenize(text)\n",
    "    tokens = [token.strip() for token in tokens]\n",
    "    if is_lower_case:\n",
    "        filtered_tokens = [token for token in tokens if token not in stopword_list]\n",
    "    else:\n",
    "        filtered_tokens = [token for token in tokens if token.lower() not in stopword_list]\n",
    "    filtered_text = ' '.join(filtered_tokens)    \n",
    "    return filtered_text\n",
    "\n",
    "remove_stopwords(\"The, and, if are stopwords, computer is not\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Building a text normalizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def normalize_corpus(corpus, html_stripping=True, contraction_expansion=True,\n",
    "                     accented_char_removal=True, text_lower_case=True, \n",
    "                     text_lemmatization=True, special_char_removal=True, \n",
    "                     stopword_removal=True, remove_digits=True):\n",
    "    \n",
    "    normalized_corpus = []\n",
    "    # normalize each document in the corpus\n",
    "    for doc in corpus:\n",
    "        # strip HTML\n",
    "        if html_stripping:\n",
    "            doc = strip_html_tags(doc)\n",
    "        # remove accented characters\n",
    "        if accented_char_removal:\n",
    "            doc = remove_accented_chars(doc)\n",
    "        # expand contractions    \n",
    "        if contraction_expansion:\n",
    "            doc = expand_contractions(doc)\n",
    "        # lowercase the text    \n",
    "        if text_lower_case:\n",
    "            doc = doc.lower()\n",
    "        # remove extra newlines\n",
    "        doc = re.sub(r'[\\r|\\n|\\r\\n]+', ' ',doc)\n",
    "        # lemmatize text\n",
    "        if text_lemmatization:\n",
    "            doc = lemmatize_text(doc)\n",
    "        # remove special characters and\\or digits    \n",
    "        if special_char_removal:\n",
    "            # insert spaces between special characters to isolate them    \n",
    "            special_char_pattern = re.compile(r'([{.(-)!}])')\n",
    "            doc = special_char_pattern.sub(\" \\\\1 \", doc)\n",
    "            doc = remove_special_characters(doc, remove_digits=remove_digits)  \n",
    "        # remove extra whitespace\n",
    "        doc = re.sub(' +', ' ', doc)\n",
    "        # remove stopwords\n",
    "        if stopword_removal:\n",
    "            doc = remove_stopwords(doc, is_lower_case=text_lower_case)\n",
    "            \n",
    "        normalized_corpus.append(doc)\n",
    "        \n",
    "    return normalized_corpus"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Pre-process and normalize news articles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "news_df['full_text'] = news_df[\"news_headline\"].map(str)+ '. ' + news_df[\"news_article\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 442,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'clean_text': 'us unveils world powerful supercomputer beat china us unveil world powerful supercomputer call summit beat previous record holder china sunway taihulight peak performance trillion calculation per second twice fast sunway taihulight capable trillion calculation per second summit server reportedly take size two tennis court',\n",
       " 'full_text': \"US unveils world's most powerful supercomputer, beats China. The US has unveiled the world's most powerful supercomputer called 'Summit', beating the previous record-holder China's Sunway TaihuLight. With a peak performance of 200,000 trillion calculations per second, it is over twice as fast as Sunway TaihuLight, which is capable of 93,000 trillion calculations per second. Summit has 4,608 servers, which reportedly take up the size of two tennis courts.\"}"
      ]
     },
     "execution_count": 442,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "news_df['clean_text'] = normalize_corpus(news_df['full_text'])\n",
    "norm_corpus = list(news_df['clean_text'])\n",
    "news_df.iloc[1][['full_text', 'clean_text']].to_dict()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Save the news articles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 443,
   "metadata": {},
   "outputs": [],
   "source": [
    "news_df.to_csv('news.csv', index=False, encoding='utf-8')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Tagging Parts of Speech"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "news_df = pd.read_csv('news.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "corpus = normalize_corpus(news_df['full_text'], text_lower_case=False, \n",
    "                          text_lemmatization=False, special_char_removal=False)\n",
    "\n",
    "sentence = str(news_df.iloc[1].news_headline)\n",
    "sentence_nlp = nlp(sentence)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Word</th>\n",
       "      <th>POS tag</th>\n",
       "      <th>Tag type</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>US</td>\n",
       "      <td>NNP</td>\n",
       "      <td>PROPN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>unveils</td>\n",
       "      <td>VBZ</td>\n",
       "      <td>VERB</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>world</td>\n",
       "      <td>NN</td>\n",
       "      <td>NOUN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>'s</td>\n",
       "      <td>POS</td>\n",
       "      <td>PART</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>most</td>\n",
       "      <td>RBS</td>\n",
       "      <td>ADV</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>powerful</td>\n",
       "      <td>JJ</td>\n",
       "      <td>ADJ</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>supercomputer</td>\n",
       "      <td>NN</td>\n",
       "      <td>NOUN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>,</td>\n",
       "      <td>,</td>\n",
       "      <td>PUNCT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>beats</td>\n",
       "      <td>VBZ</td>\n",
       "      <td>VERB</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>China</td>\n",
       "      <td>NNP</td>\n",
       "      <td>PROPN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            Word POS tag Tag type\n",
       "0             US     NNP    PROPN\n",
       "1        unveils     VBZ     VERB\n",
       "2          world      NN     NOUN\n",
       "3             's     POS     PART\n",
       "4           most     RBS      ADV\n",
       "5       powerful      JJ      ADJ\n",
       "6  supercomputer      NN     NOUN\n",
       "7              ,       ,    PUNCT\n",
       "8          beats     VBZ     VERB\n",
       "9          China     NNP    PROPN"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "spacy_pos_tagged = [(word, word.tag_, word.pos_) for word in sentence_nlp]\n",
    "pd.DataFrame(spacy_pos_tagged, columns=['Word', 'POS tag', 'Tag type'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Word</th>\n",
       "      <th>POS tag</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>US</td>\n",
       "      <td>NNP</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>unveils</td>\n",
       "      <td>VBZ</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>world's</td>\n",
       "      <td>VBZ</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>most</td>\n",
       "      <td>RBS</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>powerful</td>\n",
       "      <td>JJ</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>supercomputer,</td>\n",
       "      <td>JJ</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>beats</td>\n",
       "      <td>NNS</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>China</td>\n",
       "      <td>NNP</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Word POS tag\n",
       "0              US     NNP\n",
       "1         unveils     VBZ\n",
       "2         world's     VBZ\n",
       "3            most     RBS\n",
       "4        powerful      JJ\n",
       "5  supercomputer,      JJ\n",
       "6           beats     NNS\n",
       "7           China     NNP"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nltk_pos_tagged = nltk.pos_tag(sentence.split())\n",
    "pd.DataFrame(nltk_pos_tagged, columns=['Word', 'POS tag'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Shallow Parsing or Chunking Text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10900 48\n",
      "(S\n",
      "  Chancellor/NNP\n",
      "  (PP of/IN)\n",
      "  (NP the/DT Exchequer/NNP)\n",
      "  (NP Nigel/NNP Lawson/NNP)\n",
      "  (NP 's/POS restated/VBN commitment/NN)\n",
      "  (PP to/TO)\n",
      "  (NP a/DT firm/NN monetary/JJ policy/NN)\n",
      "  (VP has/VBZ helped/VBN to/TO prevent/VB)\n",
      "  (NP a/DT freefall/NN)\n",
      "  (PP in/IN)\n",
      "  (NP sterling/NN)\n",
      "  (PP over/IN)\n",
      "  (NP the/DT past/JJ week/NN)\n",
      "  ./.)\n"
     ]
    }
   ],
   "source": [
    "from nltk.corpus import conll2000\n",
    "data = conll2000.chunked_sents()\n",
    "\n",
    "train_data = data[:10900]\n",
    "test_data = data[10900:] \n",
    "\n",
    "print(len(train_data), len(test_data))\n",
    "print(train_data[1]) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('Chancellor', 'NNP', 'O'),\n",
       " ('of', 'IN', 'B-PP'),\n",
       " ('the', 'DT', 'B-NP'),\n",
       " ('Exchequer', 'NNP', 'I-NP'),\n",
       " ('Nigel', 'NNP', 'B-NP'),\n",
       " ('Lawson', 'NNP', 'I-NP'),\n",
       " (\"'s\", 'POS', 'B-NP'),\n",
       " ('restated', 'VBN', 'I-NP'),\n",
       " ('commitment', 'NN', 'I-NP'),\n",
       " ('to', 'TO', 'B-PP'),\n",
       " ('a', 'DT', 'B-NP'),\n",
       " ('firm', 'NN', 'I-NP'),\n",
       " ('monetary', 'JJ', 'I-NP'),\n",
       " ('policy', 'NN', 'I-NP'),\n",
       " ('has', 'VBZ', 'B-VP'),\n",
       " ('helped', 'VBN', 'I-VP'),\n",
       " ('to', 'TO', 'I-VP'),\n",
       " ('prevent', 'VB', 'I-VP'),\n",
       " ('a', 'DT', 'B-NP'),\n",
       " ('freefall', 'NN', 'I-NP'),\n",
       " ('in', 'IN', 'B-PP'),\n",
       " ('sterling', 'NN', 'B-NP'),\n",
       " ('over', 'IN', 'B-PP'),\n",
       " ('the', 'DT', 'B-NP'),\n",
       " ('past', 'JJ', 'I-NP'),\n",
       " ('week', 'NN', 'I-NP'),\n",
       " ('.', '.', 'O')]"
      ]
     },
     "execution_count": 133,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from nltk.chunk.util import tree2conlltags, conlltags2tree\n",
    "\n",
    "wtc = tree2conlltags(train_data[1])\n",
    "wtc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(S\n",
      "  Chancellor/NNP\n",
      "  (PP of/IN)\n",
      "  (NP the/DT Exchequer/NNP)\n",
      "  (NP Nigel/NNP Lawson/NNP)\n",
      "  (NP 's/POS restated/VBN commitment/NN)\n",
      "  (PP to/TO)\n",
      "  (NP a/DT firm/NN monetary/JJ policy/NN)\n",
      "  (VP has/VBZ helped/VBN to/TO prevent/VB)\n",
      "  (NP a/DT freefall/NN)\n",
      "  (PP in/IN)\n",
      "  (NP sterling/NN)\n",
      "  (PP over/IN)\n",
      "  (NP the/DT past/JJ week/NN)\n",
      "  ./.)\n"
     ]
    }
   ],
   "source": [
    "tree = conlltags2tree(wtc) \n",
    "print(tree)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "metadata": {},
   "outputs": [],
   "source": [
    "def conll_tag_chunks(chunk_sents):\n",
    "    tagged_sents = [tree2conlltags(tree) for tree in chunk_sents]\n",
    "    return [[(t, c) for (w, t, c) in sent] for sent in tagged_sents]\n",
    "\n",
    "\n",
    "def combined_tagger(train_data, taggers, backoff=None):\n",
    "    for tagger in taggers:\n",
    "        backoff = tagger(train_data, backoff=backoff)\n",
    "    return backoff "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "metadata": {},
   "outputs": [],
   "source": [
    "from nltk.tag import UnigramTagger, BigramTagger\n",
    "from nltk.chunk import ChunkParserI\n",
    "\n",
    "class NGramTagChunker(ChunkParserI):\n",
    "    \n",
    "  def __init__(self, train_sentences, \n",
    "               tagger_classes=[UnigramTagger, BigramTagger]):\n",
    "    train_sent_tags = conll_tag_chunks(train_sentences)\n",
    "    self.chunk_tagger = combined_tagger(train_sent_tags, tagger_classes)\n",
    "\n",
    "  def parse(self, tagged_sentence):\n",
    "    if not tagged_sentence: \n",
    "        return None\n",
    "    pos_tags = [tag for word, tag in tagged_sentence]\n",
    "    chunk_pos_tags = self.chunk_tagger.tag(pos_tags)\n",
    "    chunk_tags = [chunk_tag for (pos_tag, chunk_tag) in chunk_pos_tags]\n",
    "    wpc_tags = [(word, pos_tag, chunk_tag) for ((word, pos_tag), chunk_tag)\n",
    "                     in zip(tagged_sentence, chunk_tags)]\n",
    "    return conlltags2tree(wpc_tags)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ChunkParse score:\n",
      "    IOB Accuracy:  90.0%%\n",
      "    Precision:     82.1%%\n",
      "    Recall:        86.3%%\n",
      "    F-Measure:     84.1%%\n"
     ]
    }
   ],
   "source": [
    "ntc = NGramTagChunker(train_data)\n",
    "print(ntc.evaluate(test_data))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 152,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(S\n",
      "  (NP US/NNP)\n",
      "  (VP unveils/VBZ world's/VBZ)\n",
      "  (NP most/RBS powerful/JJ supercomputer,/JJ beats/NNS China/NNP))\n"
     ]
    }
   ],
   "source": [
    "chunk_tree = ntc.parse(nltk_pos_tagged)\n",
    "print(chunk_tree)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 153,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAsoAAABiCAIAAADGJGK9AAAACXBIWXMAAA3XAAAN1wFCKJt4AAAAHXRFWHRTb2Z0d2FyZQBHUEwgR2hvc3RzY3JpcHQgOS4wOfoZEaQAABVKSURBVHic7d1BjOPWeQfwb+1d1zuynRLeGdcGgpmy2AYe5eCC0SmH3QJMDxugp9D3vbBAzgmo4+YmITkGBoYIkNwKDN2jAxTDw0wBHwwNe8qocINhNG0Ru6P10Fuv5LHXm+nh231g+EiKkp5ISvr/sFhoOBL5+ImP7+N7j5xrV1dXBAAAAKDOC1UXAAAAAFYN0gsAAABQDOkFAAAAKIb0AgAAABRDegEAAACKIb0AAAAAxa5XXQCAZzzPC4LAsixN03Rdr7o4AAAwO/ReQC10u11d1zudThAErutWXRwAAJgL0guohSiKDMMgItu2TdOsujgAADCXa3hqJ9RBEAS+70dRZJom0gsAgGWH9ALqxff9IAgcx6m6IAAAMDsMjkAtdLtdfmGaZhRF1RYGAADmhDtHoBZ83yciwzB837csq+riAADAXDA4AnURRVEQBJh4AQCwApBeAAAAgGKYewEAAACKIb0AAAAAxZBeAAAAgGJIL6BG/vmjj4Kzs6pLAQAA88KNqVCeaDwOBgMiCgaDaDzmF9/86U/Hf/jD48tLInr5xo3LJ0/E+42dHa3RICJtY0Pf2nq2cHubF+qbm/rmZuk7AQAAk+HOEVApHA7D4ZCI/H6fiKLRiH8MBoNoNEq8+fYbb1yMRp89fkxEf7mx8f3bt3ffeusfvvtd8fH4GojIPznJ2q7ZbPILfXOTkw8iMnd3+YWxs6NtbKjZQwAAKADpBUwnODuLRqNoNOJRDNH8y22/1mgYOztExP9rGxvfefPNf/v4408fPfooDMPzcyKyWi1jZ8dsNo3t7WnLwFsXgynh+Tn3iESjEfeRyESRRKm4YOK1yEgAAGAeSC8gSXQ8cMvNzXZqm61vbfHwhEggRD4R7y0Izs68Xs8/OeE16FtbVqtlbG9brVY5+0KxbhUiEjsSDoec5cjErlEsERHjMlqjMVU+BACwbpBerCNudEWLy81talsrZj9wEyumO0y8yo/GY6/XCwYDr9fjngbuqLBarRpOmBCTQig2L4RiiUjq4A5LnSAi4oMJIgCwnpBerKDUGZSU0UZy6yiaRr5An/nqvMKOinLEe0HkCSI54zKUNkEkPi6DCSIAsEqQXiylqWZQcqsmmjS+sFZ4VR0Oh36/75+c+P1+NBppjYa5u2s2m+bu7jpfuOdPEMkZl0mdIBL/vjBBBADqD+lFTc0zgzIxkLEgfr/PWQVfr/MMTXN3F43ftPIniOSMy4gJIqk37mKCCABUCOlFZZTPoCwBOioqNHGCSM6Nu2KCSOqNu5ggAgDKIb1YoBJmUJYDHRXLJXWCSJEbdyk2QST1xl1MEAGAgpBezK7CGZQlCIdDcesHEWmNhrj1Aw3MykidIFLkxl1MEAGAfEgv8tRqBmU5vF6P7/7gdoU7Kvjuj6qLBlWaeYJI/pPd65xhA8A81j29qP8MyhKgowJUSZ0gouTJ7itQ0QDWyuqnF8s4g7IE0Xjsn5zEOyrMZpNTClxNQglEL4jyJ7uvZIUFWDqrkF6szAzKEgRnZ3zfB19E6ltbPEPTbDZxRoZ6EhNElDzZXdR6jMsALNQSpBerPYOyBNxRwXd/iI4KTinWOSywesQEESVPdhcTRDAuAzCDWqQXaziDsgToqABIFZ8gIj/ZneaYIIJxGQChpPQCMyjLIf6QmOiomO0vngMATXqy+zwTRNZnQBbWlvr0gu9sxAzKkrlHR+7h4ar+ITGAmsu/cXeqCSL2nTu4lIIVcF35GrlP/tnIRbNpNptrOIOyfOFwqG9uWq1WPf/iOcBqK3hyy3+yezAYBIMBHrEPq6EWcy8AAABglbxQdQEAAABg1SC9AAAAAMWQXgAAAIBi807tDIIgiiLTNInI930iMgwjiqIwDPkNuq7ruj7nViCBQ82x5a9A0zRN0xB2gOUin0Jv3br18OFD/i0qMiyveXsvDMNwXZdrhWmavu9rmsb1wTRNXdc9z/M8T0FJIUbX9TAMOc6GYfi+bxgGwg6wdORT6DvvvEOoyLD8FAyO6LrOdSP1V47jiEtqUIXTC34dRVHi+gZhB1giWadQVGRYamrmXti23e125eVRFHW7XXTuLYLIMDzPsywr/iuEHWC5pJ5CUZFhqal5rJau6/GBfyIKw7Ddbuu6blkWqsci2Lbdbrc7nQ5PvOCFCDvAMkqcQlGRYQUoe2ont3biR13XbdtWtXLIEgSBYRjiR4QdYEnFT6GoyLACXnzw4ME8n/d93/O8y8tLbuR837csixd++OGHYRjGGz9QS9O0drstvkGEHWDpyKdQTdNQkWEF4KHgAAAAoBgeqwUAAACKIb0AAAAAxZBeAAAAgGJILwAAAEAxpBcAAACgmLLnXkCF3KOjjz/55DtvvmnfuVN1WQBgCsHZWTQa/c/Fxb/+7nefPnr0+Xj8+Kuvbr3yyl9961vffv31f3znHSIydna0jY2qSwowHfU3pl67f//gpz81d3fVrhZk4XDoHh25h4fRaPT2W2/9xx//qDUa9t279p07+uZm1aUDACIiv98nonA4DIdDIgrPz6Px+L8+++w/P/1UfvNf3Ljx1ZMnN1588cnTp/Jvv3/79s2XXtI3N7VGg4iM7W2t0dAaDWN7e8E7ATA19F4sJa/Xc4+O/JMTrdGwWi377l1jezs4O3MPD93Dw+4HH5jNptVqoTMDYNGi8TgYDIgoGAyi8ZhfEOcT5+eJN7928+Y3T5+Ov/6af9x89dVvv/76377xxt+//baxs2Nsb7fff7/7wQdf/+pXnI78y/Hxf19cfPLo0b8PBkT04e9/T0Svv/LKk6dP/+/LLxMr17e2+LrC2NkhIn1zU/yIzg8oH3ovlgl3V3i9Xnh+rm9tOffuWa1W4sQRjcder+ceHgaDATozAOYnOh64HyIajfjHYDCIRqPEm81m84vLy1dffvnx5eWnjx59+eTJ/z56xL/i5t/Y2eGGP/Uk6fV677733tWvf51VDL/fD8/Pw+GQkxgievXmzb++dWvjpZdevnHj6urq2rVrX1xeit8KWqMh0g7u/OACoPMDFgTpxXLw+31OGojIvnvXarUmRpg7M7xeLxqNzGbTvnPHarXKKCvAsuEJENFoFJyd0fPxCyLyT04S7xSNNP+vbWy8dvPmJ59/Pvzii88eP463+vxO7jkQLyaWxO/3f/Dzn6emF6nFDs/P+f/UTb928+aNF174u+1tTozoec9KamJk7OxojYa2saFvbVGs8wMnc5gN0otai8Zj9/DQPTri7gqr1XLu3Zuqn1NeAzozYA0lJkBwKxuNRvJVfv4QQ1YXwmzJRGo5i6cXqR/nEgaDQXx0JqvjpHhYEnkVPU87RHwAEpBe1FS878FqtfjfPCuM938oWSFAfWRNgChymc4TJCntMl0kEzwgEu/MMJtNHmUwd3cVNrGcXhw/eKBqwCKecMSj8SzV2Noytrf5f/mz3KlDhUeFiKhIVGFNIL2ol0XPnEBnBiyprAkQ8vgFPW/qpp1kwDkKJyjBYBBf87MOCdXJhIzTi8WdQhP7mJpwFNzHqea0JvqEuI+H0Pmx0pBe1EX8LtMS7vuYYTIHwEKlToAo0lEv2qqphiSKNLQ5V/YLsuj0Qra4HprEFzpVRoh7blcA0ovq8c0gibtMy9l04lYU+84d++5d3MMGiyAudhMj/UUudpVMM8waJoinESUnE6mFLDm9kC16fkl8KzRf5wfuua0zpBeViTftxs4OdyFUVVW8Xo//ETozYFZZDUbxCRAKr1anneRYE8HZ2fcePKjbKXTiLSqLCKYYBSt+Ow/uua0VpBcV8Pt9nrZJNWvL0ZkB+bLm+hU/4y9orL2S9m9BluIUWnn2lpiLQwVyWVLaGQYTIb0oT2JaZZ0b79SnglZdKChD6iVjkRs4y+yvnphMKOm9r8qSnkLnuUVlQeUh6VnsuOe2TEgvyiAe101LdVNofLZp5cM3oMRUs/1rcs03ce4hXy4vaTIhu3b//v6Pf7wUp4gcCm9RWYSp7rktficzxCG9WKCVeT53hZNPYVpTdRqnPqug2hHrZzeFlvioibq5dv++88Mfdn70o6oLolj5DxGZ2TzTkHHPrYD0YiHC4bD729+KB3Kvxl8XkzszVmCnllHx+/3q3+tb8O5Qfl1tUUuzqumFrJxbVBZhhntua5XElwPphWKJC33n3r3VOy1ykrHsXTJ1VnzYeOlu2PP7/bo9aqJu1ie9kK3GFF0lDxyrbRUuDumFGms4TSHxJ9NWo4emHDNMgFjSPzRV5P4C/kPk1ZazVtY5vZBVfovKIhS/55ZmfQRtHSC9mFfiiRHrNi9hZeaXqDXtn/CmlXheobj0XKWWoHzf+9nPzGYT6UWWut2iotwMz4+hjL/AV2q5JerTix/84hcdy1rer3Yq7tHRP/3mNzW/y7QcojODiC5++cuqi1MZr9d797334kuynmBdnwkQSvyN43A+Uf+B85prv/++vrmJvsCCsqbvmM3mwU9+UnXpFqL44OnMf3pXCfXpxbrx+31cjQlcw9c5IDxVrT4XEKXxej1OLNZnl6GeuN3lvzVddVkqEO86rbYPDOkFAAAAKPZC1QUAAACAVYP0AgAAABRDegEAAACKXU9dGoZhGIaaphmG4fs+ERmGoWkaEXmeFwSBZVmapum6nvrxIAiiKDJNk4j449evX//mm2/iSwzDiKIoDEP+iK7rWWuriXa73el0xI9BEHieZxiGZVlTrYd3n/eXA6VpmqZpIhQcdnoeRl4o4l+JnJ3l40FEJgiCdrt9cHAw1fqLx4TqFBYi4qJOfBtXKH4tDvX4wvgOFqlitRJFUbfb5dfxOpJQvMpMDFf9TxcLVfCom58cc7XfQuLsMa154lCwkSre6s2D12+aJm/R9/0gCBzHSX1zoiWamRwBwzDCMEwsvHXr1sOHD/kjU3/jVxkODg7Ea8dx+EWn0zk+Pr66utrb2xMLU1mWJdbA75SXiK2cnp52Op39/f2cFVbu9PRUXhiPUvH17O3tiR8Tobi4uNjf3+dQxMOeuvWSZe1s/Eg4Pj7OOaiyFI/JVc3CUvwASD3U5R0sXsXqo9PpXFxcFHzzVBGT35y6cN2UHAE55gq/hXkO8jnLULCRWnSVdByHN3pwcGDbNi/M2TWFJ73U/ZUXztxMp/deZImiiK+xbNvm1CaLruu+73MSlLUk/ivHccQF0DxE3heGoeu6uq7bth2GYbfb1XWdL4gdx/F9nwtjWZbrumEYmqZpmiZfYOm6HkWRbducuvKbDcPIT9zExRmvTVyMyjsr0v8oihLr1DTNsiwOBa+B3zxPysypsW3b3Dfguq5lWWJnNU2LoshxHN5ZjoZlWZ7nUfbFqPhsomCGYSS+4iJhKR4TUhcW1u12E50lvMupwUnsSxRFnueJrhfbtiduLvVQj+9g8So2EX+VXPIoiizLMgxD3q/8wyNRF+TDw3Vd3/e5P4krUWodnGdHlJCPw9RypgZNfDwejdSawh05fDxrmsYXoHLM5aNOfCS+3dQS+r6fetQVLGENiSOfD0JK2xd6fh7mQHU6nfw4TDwPU+FGSmGVlIVhqOs6b9E0zfgJzXVdfoM4/8gtkdy0cedHPFA5W0/d36yWepZmOivvSO29OD4+7nQ6ItvK4TgOJzvi4/KSq9jVm8Lei9SS89bFtuK/EpfO4leJ1/Jqs7bF13Cnp6ec7WbZ29vjwuzt7YnLPk6NHcexLCuenyrJl3klvJvxmPCLi4uL+FZs2079LsTOxoNzfHxsWVbq28RWioRlqphcKQoLl5bDwtcN+cGR96X49dP+/v7BwcH+/r7jOCIO8g4Wr2JF2LYtgimqYep+Xf354ZFTF+TDQ/4uUuug/Kt8ansvUo/D1HLKQcuKRn4oco6l1KNO3m5WCeUIFC/hDBbae2FZVsFoC/v7+zm1r/h5uGAjpbZKJmRFUvQIJnp2r9JOsImmTRCBypK6v/LCmZvp6XovDMPgPM73/W63mzU4JJKd+PB56pIwDNvttq7rlmUtejCV18/XEETEVyr8v7hg5fLMtn7HcVzX5YuS/Ms127Z5/Cw+digScL4A4iJ1u12xcJ7RVnGRKtYTz08Ta+aL6Zy1BUEgrgnkTp1E2lswLMVjQurCIrZCfx6ErOAU/4rziejJOzhVFZuIr4/F66z9kg+PnLow8fCYE9fKIgunUvy7SwSNsqORHwo+XLNiLh918naLm62E+eSYz/8tyLhR4Nf50RbXzfm1fqpKWqSRUlslCxLTPuI9u1kSTVvBQInPJvZXXjhzM52ZXqTWChFc7gKduHZuM3KWLKLjVMz7Ey9S8deWaMwMw5i5PNw3RVJbmCXeSMeJswx3cInewtRxpeK4l7Xb7fJGDcPodruzrZN7z/h1fH5uqqnCMjEmpDosqbKCM+1XHKdpGq9Q13X5NC12cNoqli+KInGEcx9v1peeODzmrAsF62Cq1GGynLGz4quVv7vUciaCRrNGg1dSvKLJ280qoWyqEvIZeOLRK8d8/m8hX060gyAQ6QKP9eQUe6pKOrGRUlslEwzDcF1XHB5ZJ8DiigdKkCOQWDhzM52ZXvBgMI//iXxNzC/1fT8nKeYhQz6HmqbJo7OJJfw2HjxTnmTw+qMoCoKAiIIgEFt3XZcXEpFpmu++++7+/j7/yCkb73X8S+IoR1EUL6q8kK//uOmdeIhYlhW/wyI+jkjP8zkebeXSzlmZeT2cUfG3ydMmxFcsvs12u813f1DsBJQaAdd1NU3jQGXNqqHnMz+KhKVITNSGhbfCM6VFnsTHSWpw5H3Rdb3dbvNMBc7us7bC+8L9E57n8WC8vIMFq1hBPADPZ20x713eL/nwyKkLicODKxQvEWPnJNVByjiKsspMRDxvIGfhVLKOQ7mciaBlRSO1pojwcldQp9NJjXnqUSdvN6uE8lFXvIS8QnHSyyHHfP5vIYFnh8T3JSvauq5zZeFbVzh6qbWv4AmnYCNFqqtkAl918F5wgXlDnueFYSgaLJ6iIdcguWlzHEcOVMEIZIVl9mY6Z+Dk4uIidZCv/jO3T09PZ55eO88OLkVw4uYp8MShTSVbqVDB41/53ilcYeoMleLrn/PwmLkOpn52nhWy1N2RV5s1rad4NA4ODhK30hT5bNZ2U3d85pPzwcFBYrJUDnnT838LWaUqErGCb6tzlcwi71p91jYz/M0RgNUUvw2k6rIsjaqCVtp2+a6KRU90AyD8STMAAABQDg8FBwAAAMWQXgAAAIBiSC8AAABAMaQXAAAAoBjSCwAAAFAM6QUAAAAo9v9DKMOhZ04sJAAAAABJRU5ErkJggg==",
      "text/plain": [
       "Tree('S', [Tree('NP', [('US', 'NNP')]), Tree('VP', [('unveils', 'VBZ'), (\"world's\", 'VBZ')]), Tree('NP', [('most', 'RBS'), ('powerful', 'JJ'), ('supercomputer,', 'JJ'), ('beats', 'NNS'), ('China', 'NNP')])])"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from IPython.display import display\n",
    "os.environ['PATH'] = os.environ['PATH']+\";C:\\\\Program Files\\\\gs\\\\gs9.09\\\\bin\\\\\"\n",
    "display(chunk_tree)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Constituency parsing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 446,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(ROOT\n",
      "  (SINV\n",
      "    (S\n",
      "      (NP (NNP US))\n",
      "      (VP\n",
      "        (VBZ unveils)\n",
      "        (NP\n",
      "          (NP (NN world) (POS 's))\n",
      "          (ADJP (RBS most) (JJ powerful))\n",
      "          (NN supercomputer))))\n",
      "    (, ,)\n",
      "    (VP (VBZ beats))\n",
      "    (NP (NNP China))))\n"
     ]
    }
   ],
   "source": [
    "# set java path\n",
    "import os\n",
    "java_path = r'C:\\Program Files\\Java\\jdk1.8.0_102\\bin\\java.exe'\n",
    "os.environ['JAVAHOME'] = java_path\n",
    "\n",
    "from nltk.parse.stanford import StanfordParser\n",
    "\n",
    "scp = StanfordParser(path_to_jar='E:/stanford/stanford-parser-full-2015-04-20/stanford-parser.jar',\n",
    "                   path_to_models_jar='E:/stanford/stanford-parser-full-2015-04-20/stanford-parser-3.5.2-models.jar')\n",
    "                   \n",
    "result = list(scp.raw_parse(sentence))\n",
    "print(result[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 447,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAisAAAEdCAIAAABCB7BSAAAACXBIWXMAAA3XAAAN1wFCKJt4AAAAHXRFWHRTb2Z0d2FyZQBHUEwgR2hvc3RzY3JpcHQgOS4wOfoZEaQAACAASURBVHic7d1PbBzZfSfwJ82MFyS9k6lsSGc0jtn7BgkCdtZ/8sxgMRhgdChdJDgbYFU6SjMHlwAhgB145Kqb5VsVpEMsBBOzfMgourG0t5EufAcxgLIH6mGNLNgIkLDQDGILS9p8dnbYZoKsew8/6amm/5HsrupX3f39YDAoPTa7f91drG+9P119pt1uMwAAgLE7a7sAAACYUUggAACwAwkEAAB2IIEAAMAOJBAAANjxqu0CAGBqZVmWZRltc8455+ZHaZoqpTzPE0L0bNFaK6Xy9+a67rgKhzFBHwgAykKR47ou5zxN0zRNqT0MQ8dxoijKsixJkp4tFD+u60op6f/2ngeUBQkEAKXjnAdBQP0hpRTnnDo0nudlWdbdwhgTQpjuEWPM931LtUOJMAoHACXSWkspaUjN8zzGmJQyCAJzA9d1f/jDH/7oRz/Kt1C/J38/+RE8mBroAwHAmFCfxnEcMznEGNNaf+ELX+hocRzHQn0wdkggACiR4ziu63qe53keTfB4nmcmhBhjUsrvfOc7HS358TeYYq/cunXLdg0AMJ2klGmaPnnyxHXdc+fOpWmqtRZCNBoNKeXR0VGSJFevXuWcd7ScO3eOMZZlWRzHSqlGo4GFcFPpDK5MCgDjRzND+VzpboGphwQCAAA7MA8EAAB2IIEAAMAOJBAAANiBBAIAADuQQAAAYAcSCABsCh88OPPBB7arADuQQAAAYAcSCABsEsvLtksAa5BAAGCTs7BguwSwBgkEAAB2IIEAwD61u2u7BLAACQQA9unDQ9slgAVIIAAAsAMJBAAAdiCBAMAmrIWbZUggALAJnweaZUggAACwAwkEAPZhLdxsQgIBgH34PNBsQgIBAIAdSCAAALADCQQAAHYggQDAMlGr2S4B7EACAYBlbr3OFxdtVwEWnGm327ZrAACAWYQ+EAAA2IEEAgAAO5BAAABgBxIIAADseNV2AQAwzdI0VUp5nuc4DuecMaaU0lrTTznnnHNqcV2XMSalZIx9+umnn//85/M/dRxHCGHxiUAZ0AcCgLLEccw5j6JIKZUkCTUKIShsXNelTBJCJElC2eO6rpTyy1/+cpZl5qdSSsTPVEICAUBZtNaUHL7vU+r0wzmnBDL/zLLM3AlFEUwfJBAAlMXzvDiOwzCUUg5OIMaY7/txHJt/mhBK09TzvHILBUuQQABQFiFEEARRFDHG8unSE+fccRzT9fF9nwbuaBKo7FLBCiQQAJTFpI7rumb1wQAmdQylFGaAphjWwgFAWWhqh5YSmJG0JEmyLNNa09AcLT2gpQo0XWRCyPO8MAw3NjasPQEoGa4LBwAl0lorpY6dBILZhAQCAAA7MA8EAAB2IIEAAMAOJBAAANiBBAIAm2Sj8YPc1RBgpmAlAgAUSe3u6sPD59vNpm61aDvb2zPbcns7/ytnzpxpt9uvnD37hddff/ONN/7LF7/4+2++KWo1Z2FBLC+Ps3gYMyQQAAyS7e9n+/vd2/rw8DPte3v97sFZWBC12vPt+Xm+tETbfHGRLy4yxn70N3/zP//hH376i1/8v1//uvvXv/ibv/n7b77JFxedhQX6FVGrOfPzxTw9sAoJBDBzZKNBG/rwUO3umnbVbL5sf7Hdk1uvm+18urzcHqr7km5tPfq7v/sfSv3fX/3qtVdf/dwrrxz+67/Sj/7T5z//808/zd/4eSepVmOMuSsr5v8wQZBAABOv38BXPkU6Br460NHcbL9sX1427eM8vstGQzYa6dZWtrf3H+fm3l5cfOXs2Z9/+mnzZz+jG9QWF3/79dd/+atfvXr27P/+53/O/y6lI/WTMJRXcUgggMrpN/CVn0pRzaZJnW58aYkGuBhjNH5ltvPtZruy1O5uurVFUcQY+5M//MP/vLjYbrd/orVsNOgVcBYWvvKlL/3266+/Pjd39syZX7fb9KLlBwapt5QfypuIpz/1kEAApdOt1mcGuHoNfA2eSmG5ga/8VMroA1+TItvfpyiiV8yt192Vlf/69tt7//IvandXNZsmkilsRK0mlpf/w2uvzX/uc9QvpPzuSO78UB51+DDJNE5IIIBhmKmUju3RB77yg12Y2OjWEUWiVvNWV73VVb64mO3vq2ZzQCBRN4i9eMvo/3TLjnkvM5THGHNXVqY73S1CAgEw9tnBrn5riIcb+MpPpeD8ukC61Uq3tuT2drq1xRgTtZpbr3urqyYqTh5I5g6fp9HuLq306zmUR31QDOUVAgkE06nfwNfoa4jzA184AFWBiSKaGeJLS9Qr6ui1nDaQ8r/48r9eQ3kmkKirhKG8k0MCwcQ4duBruDXEDANfUyTd2qJFdPrw0FlY8FZX3ZUVb3W1+5ZDB5JBXSUzlMe6xl2xXvxYSCCwZogPz3fID3z1W0OMs9HZRFEkG41sb29wFJHRA4nkh/JYr0mm/FAe1osjgaAwY/jwPMPAF5xSx3pub3WVposGn5f0CyR3ZYUvLbkrK6c9s8kP5dFfxIChvNm59AMSCAap7IfnAU6Lokhub9MeS1Hkrqyc5IQm29+XjQbFhumX86Ulsbw8XCDlq6I/IjNzOVNDeUig2TJ9H54HOK0B67lPeA/UNyojkIyTrxef6KE8JNBkw4fnAYZGUaSaTbOemzpGpz2UjyGQCPWT8kN5k37pByRQteDD8wDj1/HRon7ruU9obIGUf0QzlDdZl35AApUOH54HmBS61aLPFdF6booisbw8YBHdscYfSEb1L/2ABDo1fHgeYBac/KNFp2IxkEilLv2ABOqU7e8nm5sMH54HAMbYZ78qgqIouHixqD/nAYHknz8/zrGNk1/6QdRq/nvvFfKgSKBOstG4cPs2PjwPAB3MR4uefu97JXVTTCDJ7W2KujIe5bQldVz6gTG28eGHhdw5EggAAOw4a7sAAACYUUggAACwAwkEAAB2vGq7gGpJ01Qp5Xme4zicc9vlAMBMkFIyxjjnnHOllNbacRzHcbIsoxvQj8ZfGBXjuq4pUgihtS6qMPSBXorjmHMeRZFSKkkS2+UAwKzgnGdZRodyIYSUUghB/3Rdl3OepmmapuMvTAiRJAllj+u6Ukpzdl5IYUigl7TWQgjGmO/7lPkAAGNACUTbWuuOXgXnPAgCc4Mx45xTAvX80YiFIYFe8jwvjuMwDKWUSCAAGCcTQmmaep6X/5HWmkZoLJXGfN+P47i7ffTCMA/0khCC+kBSyjiOgyCwXREAzArf98MwjKKIJoGoMcuyMAw5557nWUwgznl+UqrAwpBAL5nUoeFO2+UAQOWEYcgYi6KopPtXStF5MOGc+75f0mOdCgWk+WdRhSGBXjIrPaSUHb1gAADGWJIk6+vrJd2553lhGG5sbNA/pZRpmkopLeaQlJJWZtHsOC3RKrAwXJXnM7TWSilMAgFANyllqQk0g5BAAAAnkqapWSQNhUACAQCAHViNDQAAdiCBAADADiQQAADYgdXYL9F38erDQ7646NbrYnnZdkUAANNs1lciUOqYb2h3Fha+8ju/8/jv/5623ZUVUashjQDAOPPBBxs3b7orK6U+yoU7d/ji4tq1a6U+yhDCBw/Sra2dXhfpGcIs9oHoy95N6jDG3Ho9uHTJXVkxe5VJpjBNWZoijQBgzLL9fdsl9Jbt7RV1V7OSQGp3V25vq2aTxtlYr9TJQxoBAJRtmhOoZ+r458/3S51+kEYAAGWYtgTqTh1Rqw2ROv0gjQAAijINCZTt78tGQ25vd6SOWF5263Vnfr6kx0UaAUBJRK2mmk3bVfTgrqzEDx8WdW+TmkCUOtTXoWmx8aROP0gjAIDTmqQE6pk63uqqrdTpB2kEAHASVU+g7tThS0vuyop7+XKlUqcfpBEADMd8XKSCZKNRyMx6FRNIt1pye5s+tZNPHXHxoruywhcXbRc4JKQRAEBeVRLIpA6tZGPTkjr9II0AAGwmUL/UocXT05c6/SCNAGA2jTuBulOHDrLe6qq3ujo7qdMP0ggAGGOVPRgWW9iYEqj7AqCUOjiYDoA0AphZSKBR9UydyPNw0BwC0ggApk/BCYTUGQOkEcCMyPb3q9kZKuq63QUk0Em+7ABKgjQCmGJIoN5O+2UHMAZIIwCYLMMkUPjgAV2ZbrgvO4AxGJBGwaVL0eXLdssDmFxuve4sLJT9KM7CwngeaAhuvV5Uz2yYb+nO9vez/X2kziSSjYazsIBuEABUwTAJBAAAMLqztgsAAIAZhQQCAAA7kEAAAGDHMWvhlFJaa9d1GWNSSsaYEEJrnWUZ3YBzzjkvu0oYDr1l9B7RW+k4juM4ePsArDv2z9NxHCEEbdMNaFsI4ThOeYX1POxnWVZGFhzTBxJCJElCj+e6rpTScRx6GNd1OedpmqZpetpHhfHgnGdZRu+XEEJKKYTA2wdQBYP/POmgb/480zR1Xdd13XwUlaTnYb+kLDh+FI5zTo/a80dBEJgMhKqhXZy2tdYdZyh4+wAsGvzn6TiO53nmBtQZon+OYdyi52G/jCw40TyQ7/txHHe3a63jOMYwTpWZvTxNU8/z8j/C2wdgV88/zyzLwjAMw/DKlSumkTaSJAmCYDy19TzsF54FJ7omAuc8PzrJXrxGnHPP83AIqzLf98MwjKKIRpmpEW8fQBX0/PPknPu+z14c1qMoovY4jk17qfNApoyOw37PxhEPJie9Kg+9Uvk66LWAiaCUMlOaDG8fwLDoMGhSoRAdf54GLUygbSmlmepXStGKgLJ1HPZ7No54MHnl1q1bA34spUzT9OjoiF4gKaXnedT45MmTLMt6vnBQKY7jhGFo3mi8fQBDu3LlShAEBY4c9PvzlFJKKefm5t59913GWBiGR0dH1MheTAuVZMBhv/AswFV5AABOREqZJMn6+rrtQqYHPpEKAHAiWutix98AfSAAALADfSAAALADCQQAAHYggWbLj//pn3SrZbsKAADGhviW7mRzkzHmv/deCcVA8dTurmo2s/191WzK7e0v/MZv/J9f/pIvLYnlZb605K6siFrNmZ+3XSYAfEayuckXFyv4VdSy0cj294uKgFMnkNze1q0WEqiyOiKHGilygkuX/uCtt1r/9m/00/jhw/jhQ/NTBBJAdaRbW4yxaiaQajatJRBfWlLNZiGPDYUYHDmDQyX/uwgkABizUycQWDdK5HQQy8tiebnnPSOQAKBswySQPjwsvA4YgLq9utUaPXIGQyABwDgNk0AYhSsbRQ4d+s2rXUbkDIZAArCFLy5m+/u2qygdRuEqoWfkiFpN1Gre6qqo1aowIYlAAhgbZ2GBIYGgJBMROYMhkABgREigMZmCyBkMgQRQLIzC9ZA/ysAAUx85gyGQAEaU7e3ZLqF0p04gZ2GhjDqmwIxHzmAIJADohlG44SFyhoZAAgA2dAKp3d1ZG457/nGcRiPb21O7u6aDjMgZHQIJYDYNmUCz8KHUfpHj1uve6qozP4/IKQkCCcBdWaFdfbphFO6lwZHDFxdFrTZrPb8qQCABTKuZTiBEziRCIAFMjdlaC4fImT4IJJhistGY7qH+Kf88ECJn1iCQACbIdI7CJZubcnsbkQMnDyS3Xsf3LgKM2Zl2u33a35GNRsXPHK/fu5ft74taDZEDA+QDSdRq0eXLtisCeI7Gbyp4pM3293WrVdRBdZgEAgAAGN1Z2wUAAMCMQgIBAIAdSCAAALCjx1o4pZTW2nVdxpiUkjEmhMiyrLtRa51lGf0W55xzPra6qQZ6UCrYcRzHcUw9juMIIczToUYhhOM4YysSKqvnTm5xf4apl6apUiqKouF+nQ5xxZaUR+W5riuEUEoppYIg6HnLMAyHfhbdevSBhBBJktCfpeu6Uko6mnc30p+o67qc8zRN0zQtqqxjcc6zLKMChBBSSiGEqYcik+pJ09R1Xdd181EEM65q+zNMPc/zRvl1pVRRlXQLw9BxHMqVMAzpENrvxr7vF/jQvT8PxDmXUtIZ4uBG86MgCOI4LrCywSiBaFtr3XG66jiO53lUD72UdGOc1YJRqf0ZZgTtVFpr3/fpcKSUStOUc06N1NGRUkop6SgXRZGUMk1TM/BjMoB+l064B8fGAHQqT38IdPpF7UmS0E+DIMhXZc716adxHHPOafyp45am/kEP3+4lCIKdnZ0oimh7QOPGxka73T44OIiiaH19vee9lWRtbW1nZ4c2Dg4OTGMQBEEQeJ5HPzXFj7M2qL6q7c8w9TzPM0cq2uVoT6OW/Laxvr7+9OnT9os9s0MQBHSHOzs7dLMhbGxsdN95FEXmntfW1jpu31EDHWkH199P32simFgb3JhlWRiGnHPP88bcw/B9n0Yk8yOk5gRBax3HMcVvHMemEfNAYFRqf4apR/sbbdOGUop2uY5bmv734ENWEARJktBtih0cM1Pm+dGmfugvxXEcM81xwvrZ4Kvy0CF+cGO+S2iFUqpn35MWJjDGqDNoOrw9R11gZlVtf4YZQQdrzrkQomOXU0qZRBk8GUkDX+yzJ9ynRXOi5sDY74h6cievnzH2yq1btzqaaMzx6OiI6pBSep43oPHJkydZlo1Y9HAcxwnD0DwFUw+NQs7Nzb377rthGB4dHVELezEtBDOumvszTLEkSWiNGWMsTdNz585RPyPLsk8++eTo6ChN00ajIYSYm5v75JNPGGNSykajoZT6xje+wV50LNI0ffbsWb1eZ4zdvXuXVinfv39fCEGNpzU3Nzc3N3f37l0qcm5u7tmzZ/QoZsHOu+++SwdbKaVSyvyNKKXu379Pf0d0y6tXr/arvydclQcAYHxoMj8/NqW17h6eOeHNejYWVdgY7g0JBAAAduCaCAAAYAcSCAAA7EACAQCAHUggAACwYzq/pRvghOgLUtXubuMnP3nzjTechQV3ZYUvLvLFRdulAUy/U6+Fu3DnDmNs48MPy6mnGGc++GDj5k13ZcV2IVAt9M3H9M3c2f6+3N7O/3T5t35r92c/M/+kb3nnS0tieZn+P/Z6AabcqftAolZTzWYJlQAUTzYaFDYUPPrwkNopXYJLl6jHEz96lG5t/a/vf9+Zn38eTo2GPjzM9vfjhw/NvfGlJb64+DyZFhdxigMwIozCwfRQu7vZ3p7a3X3ey9nbo3ZKDv/8+Z69Gd1qpVtb/vnzzvw8Y6w7XUzPiTaSx49NkjkLC6JWE7WaMz9vNsb1dAEmHhIIJpXprGR7e9TLoXZKBW91lVLh2J4KJYr/3nv9buDMz7srKx33k+9dpVtbJu0YY269zhcXaUrJWVjA8B1AP8MkEEbhYPw6OiL5KRy3Xhe1mluvuysrQ/RCks1Nb3X1tEsPuoPN9MAoEeXjx2YELz+lRMl0qscCmFbDJJAZggAoz7FTOGJ5mbZHeZRkczPb21u7dm30gsXyslhe9lZXTUvHlFJ++K5jSgnDdzCbMAoHlZDvQND/qd1M4dBhuvARrXRr6yQjdcM5dkopP3zXMaWEFeEwC5BAYEG+c0BHZGrvmMIpu2egdnfl9vba+++X9xAdjp1Sktvb+VFuM6WEFeEwlU6dQBgrgNPqOPHPD6nlp3DGf9afPH7sLCwMWIMwHgOmlGj4Tj5+bH5kppSwIhymwDCfByqhDJgq+ZP6/KpoOnr2WxU9ZrrVSh4/Di5dslhDPwOmlBhjHSvCzZQSVoTDxMEoHIyqYw1Y95BaNc/W40ePGGPWO0AnNNyUEmMMFxmCKhsygXSrhfOs2fR8MfSL9V0dq6Ldet1bXZ2IM3H6FOrkHpoHTylle3s0pWRWhLv1ujM/j4sMQaUMmUCq2azaKS2URDYaPadwKGbMhW0m61BOi7C9IhZhV0rPKSV9eNjzIkOiVqPeUjU7qTALMAoHn2EuadN9YRuxvFyRKZzRJY8f0/IH24WUjt6p/DM1U3TdFxnClBKMGRJoppnvJpisKZwRUa9unIuwK6XflFLPiwyZThK+twLKgASaIQO+m4BWRU/KFM6I0q0tvrQ0KWsQxoCmlDoan39ai5aZNJv5KSV8bwUUBauxp9kJv5tgps5q6eo41VyEXSmUSQMuMoTvrYDR4ROp02O47yaYNcnmJmMsuHjRdiGTB99bAYUbZhTOrdedhYXCSylQ9Sss3IU7d2hU7bTfTTBrsr0981VAMKJTfW9FcOlSdPmyjTKhuk79Ld1QTcnmJi6xDNVEvfPRL2QO0wcJBAAAdpy1XQAAAMwoJBAAANiBBAIAADuOWQuXZVmWZY7jCCGklIwxIYTjOIyxNE2VUp7nOY7DOR9HsSMIwzCKIvNPpVSapkIIz/MsVjUcpZTW2nVdxph5U7TWWZbRDTjn1X9HCkcvBe2r1EJ7L22b/bajnV6oGX/pCtdzF82yrLvRvCkwo9rH2djYMNtBENBGFEVPnz5tt9tra2umscp2dna6G/NPbbJ4nmeKN68/tezs7ERRtL6+bq04e2hvPDg4MC304jx9+jQIAtpjjY2NDfMa4qUrXM9dtGcjzLIhR+G01nSm6fs+ndQUQkoZxzFjLMuyMAyTJKHt69evx3GcJEkYhlprxliaptevX0/TlDFG7XRWpZSiX4zjmG4ppQzDUCl17KPT76ZpGsfxSW5vEeecnm/PHwVBYE7qZ4rWOggC2m3yhBBRFNHeMsAsv3SF67mLDthvYTYNmUCe58VxTMf9AhPIdV0KNs55FEV0LOCcO47jeZ7v++b4QqN/NIbmui7n3HVdrbWUMooi3/d936dbuq4bRdFJOvtpmgZB4HneRAzN+b5Pad1Bax3H8QwOJdFwseM4dObR7dh9YGZfupL03EX77bcwm4a8MqkQgqKCei1BEBRaVQ90XMgfXyhj6P80x6OUos7TcA9B8aa1dhzH9/2iKi8JBXP+hJ2eO+fc87wZPIymaWpOsZVSZjaoA+0zHY0z/tKVpHsX7dcIM+v4BMr3csxZpEkd13WL7VabgOl3Jmtwzmlu01TFORdCDB0eWZbRk6Jz4fzKhWryfT8ft5zz6gdnebTWpk8chmFHApndyRz7tNYmbGb8pStPxy46oBFm0yu3bt0afAvHce7fv390dJQkybe+9a25uTnG2Pe///1nz55Ro+d5586dK6qgRqPx5MkTpdSTJ0+ePHly9epVpRQVIIRIkkRKefXqVXP7b3/721EUUVV0bvXJJ58cHR2ladpoNIQQNFRId5hlGR2YejbevXuXIu3+/ftCiHq9XtSTKpaUMk1TekHon57nUWP+6cyUOI6fPXt27ty5c+fO0U5ydHSktabdQEr55MkT2nuVUo1Gg/5PO9KMv3RlGLCLdjTarhQsO9FVebTWSqmO+Z6ejYUw0z/D/foohZX3pKA6RtzBAKAouC4cAADYgWsiAACAHUggAACwAwkEAAB2IIEAAMAOJNDEix89qt28+bXvfU+3WrZrqbRkc9P50z8992d/Fj54YLuWmRM+eCAbDdtVQOUggSaYbrWufPRRmKZ/8NZbuz//+dvf/S7+yHtSu7sX7ty5/vHHX/3Sl/7b174WP3z4dhDgtRqn+OFDvODQbcir8oB1anf3+scfq2Zz7f33/ffey/b3r3z00YXbtyPPCy5etF1dVehWK370KH74kC8trd+44a2uMsb++9e/Hqbphdu3vdXVtfffd+bnbZcJMKOOvyYCVFC6tXXlo4/YmTMbN29+4ytfYYw5CwveH/0RHXCf/fKX7/7e78299prtMi1Lt7au/OVffvLjHweXLv31N78pajVq54uL/vnzc5/73P2//du7GxuMsXd/93dtFjoD7kopajV3ZcV2IVAt+ETq5AkfPIgfPnTr9fUbN7rP35PNzesffyxqtfUbN/jiopUKrcv296/fuye3t916PfI8sbzc72ZhmqZbW6JWizwPx8fyXLhzR9Rq0eXLtguBakECTRKa+JHb28GlSwP+mNXu7oXbtxlja9eu0bjTTAkfPEgeP2aMRZ7nv/fesbeXjcb1e/eyvb3g0qXg4kUMypUBCQQ9IYEmhtrdvfLRR/rw8CQHVt1qXbh9WzWbg7Nqypgs8c+fjzzvVFlCPUtnYeGEuQWnggSCnpBAkyHZ3AzTlC8urr3/fr8xpW50VJ2F+XbdaoVpmjx+PMp4Wn7sbu3atZkdwyzDhTt3GGMbH35ouxCoFiTQBBglSIaLrskSP3oUP3rEGAsuXhx9HWC6tRU+eIBBuWIhgaAnJFClFTKYdqrhu8lilqR7q6uR5xXVa8mv4Y4uX57BubTCIYGgJyRQdRW4oOCESxgmSD4k1q5dK2MZm9rdDdNUbm8XG2+zCQkEPeHzQBWVbG7+8Z//ef2ttzZu3hz90ypzr7129Z13jv793+OHD5/84z9+46tfnehPCyWbm3/8gx9QoP71N79ZL+4revPOvfHG1XfeoY8NJZubutXCcu2hNX7602e/+MXVd96xXQhUC/pAlWMm1YdY0HWsdGvr+r17zsLC+o0bkzgtZGWxgHlHyutvTb3wwQPVbKIPBB1wXbhqyfb3L9y+nTx+HHne2rVrhU+De6urGzdvOvPzX791K9ncLPbOS6VbrfDBg7e/+13VbK7fuLHx4YdjGxZz5ufXrl2j1+3C7dvX793DRWABCoE+UIXIRuPKRx8xxtZv3Cj1RFu3Wtc//jjd2vLPn1+7dq28BypKdT40aj7uWsi6u9lBr9vBX/yF7UKgWpBAVRE/ehSmqajV6Fx7Kh9xCObCOYOvrzPjJVUffaKg/Vd/ZbsQqBYkkH0WeyRj63UNp8odjup0yyYCEgh6QgJZ1vElC+MvgL7WQTWblfpaB9lohGmqms0ylmMUxawIdxYWZvMSfCeHBIKekEA2URfE+sq0/Oq7KkwLXb93b4IWnuWX52GtVz805IsEgg5IIJvo0F+Rc3xaGleFiybEjx7pVmuyPjmbbG5m+/uTVfM46VZLNZvVP5+AMUMCAQCAHfg8EAAA2IEEAgAAO5BAAABgx6u2C5ghSimtteu6jDEpJWNMs2rF5gAAB6BJREFUCJFlWXej4zgl1ZBlWZZltM0555ybH6VpqpTyPE8I0d3oOE7+xmWUkW90HGcMZYzu2CfS8SLD6LTW5f2BwJihDzQ+QogkSShmXNeVUtJxtruxvBroaOi6Luc8TdM0Tak9DEPHcaIoyrIsSRJqjOOYcx5FkVLKNJZXhmmkYDa1lVfG6DjndPbAXtTfrxGKopSyXQIUBn2gseKcSynN4WlA4xgqCYIgjmPGmFLKHDQ9zwvDkG6jtaaOiO/7lJGllmE4juN5nmkcQxlTTymVpilFO2W8lFIpFQQBnXNwzn3fT5IkyzI6B9JaU4eYfpdzrrX2fZ9+Srf0PI9OFKIool+h0wW6kyAIzEM7jqO1DoIgSRLqxDiOQz3FKIp6Pm7PCqWUaZrSbkAt5iFOUiFUThvGKAiCnZ2dKIpoe0BjeTY2Ntrt9sHBQRRF6+vr7XabHjp/A7rN06dPoygKgoD+Waz19fWNjY319fUgCJ4+fUqNa2trQRAEQeB53s7ODjWWWkYhzCt2bKMtQRAcHBy02+2dnR3zaufLMzue7/t0S/NbZvfIb9Mtaf/pvpP2Z3dv8+v0Jq6trdGvD3jcARV2vKqnqhCqBn2gceOcm7O/wY0lybIsDEPOued5NEZED23Gi7TWtC2EoM6HlDKOYzqlLYOZ8jFntXQ2TeetYyvjtJIkoWqPbbQu3/kYXB71Tsy2Uop2mJ639Dyv3/3Qe5fv3Ju7NXvdgMc94fNijA1dIVQBEsgC3/e7/2B6NpbBHOUNz/OSJDFHdinl2toaY8wc7mmCqtgyHMehYxPnvPuonT8elVrGKMwZg8nsfo3WZVlGr2E+2rXW9FOzQdtmqp+eghBiiEylOxFCxHF8khHmjsftKCxfYYehK4QqQAKND41r09HWdV2aVO/ZWGoNNIyezyE63MdxTIPvpt2szZNSFnsuacqIokgIQYsRHMcxQ/wsdyJcXhkjchyH3i+abxjQaB0tJqQZx/wiQypVa22m96l+OuJTL9nsG0op04UKw1ApRedMZoqFc25uqbWOoojWLppdi2ZlaPGn1prSmnb+jscdUCHnPAxD13XpGdEKyRNWCJVjexgQquLg4KB73qJn4/hVpIxuOzs7Zr5qcKN1PV/D7lJ7zkSe6vXf2NgwMzon//V+M6A9X8zuO6zsHgKD4bpwAPAcdcc9zxvzykxbjwvWIYEAAMAOfCIVAADsQAIBAIAdSCAAALADCQQAAHYggWy6cOdO+OCB7Sqek43GmQ8+sF3F8zJko2G7kFM788EH1Xk3ASYCEggAAOxAAgEAgB1IIKgWUavZLmFIk1s5gC1IIKgWZ37edglDchYWbJcAMGGQQAAAYAcSCKpINZu2SxhGtrdnuwSASYIEgirSrZbtEoYxoWUD2IIEAgAAO5BAAABgBxIIKmdCF5VN7io+AFuQQFA5E/rBGr60ZLsEgAmDBAIAADuQQACF0YeHtksAmCRIIKiiCf080ISWDWALEggAAOxAAgEAgB1IIAAAsONV2wXMNFGr8cVF21U85ywsuPW67SoYY8xdWZnEy9u4KyuYBwI4lTPtdtt2DQAAMIswCgcAAHYggQAAwA4kEAAA2IEEAgAAO5BAFaKUCsMwTdOeP03TNAzDMh70woULhd9tGbTWtksYktY6fKHfbQa/+wBTCauxK0QIIYSQUvb8qed5SqkyHrffI1aNUsp1XdtVDCNJkiAIHMcZcJvB7z7AVEICFSlNU6WU7/ucc6VUkiSe57muq5RK09RxHK21ORIlSZJlmed5dNobRVHP+zS/yzkvo2YhRP6wTg8nhMiyzHVdIURRDxTHsdbacRzHcbIsYy+ecs8Xp7sMKWWapnSA5pz7vj9iPfT608NprT3PoyfbXU/Pt5XaOedaa9/3B7ynSZJIKakD57qu67pSSqVUEARZliVJUsjTAZhIbShUEATtdnttbc1s5zcODg7Mdrvd9n1/fX294x42NjbM9sHBQRRFtP306VPP88qoOf+IQRAcHBy02+2dnZ2nT58W+yj0svi+3z7uxelZRr7OQvi+T49yknraubc1/77kt9t93tP8m046XvOe7QBTD32ggplTZjrfZ4xJKU0no2McxnEcz/MG3JtSyvRChBAldYPyfaAgCJIkoeILPzGn+vMvQr8Xp9QyDOqQ5R+9Xz0db6tSKsuynpM6x76nAGAggYpHAyxxHFN4CCHiOB5uAoNzbiYGtNY0eFWqLMuCIKCHi+O439hgUfq9OOMpQ2ttThRolGzAm5V/WznnQohRotGsqpjc5RUAo0MCFYymCjjn5hhKUzh05JJSmhPkMAxp+RN7MWFA21prKSXNDVCnIUkSOu92HCd/kl4GOtOn5CtwEogxRnMhruuaKE2ShJ5j94vTswzOeRiGNK/GOR+9q+E4DvW0GGN0b/3erI63lTpPdDN6XyiNut9Tepqm0byn1M4Y01rTApPud3/EZwdQfbgu3JjQgWa48MiyTGtdbB4MMEqpBT7iyRuHFoZhz97VCR9lxGIohksaWQWYCEggmFH5xYq2awGYUUggAACwA9dEAAAAO5BAAABgBxIIAADsQAIBAIAdSCAAALADCQQAAHb8f0Hf4725e+P4AAAAAElFTkSuQmCC",
      "text/plain": [
       "Tree('ROOT', [Tree('SINV', [Tree('S', [Tree('NP', [Tree('NNP', ['US'])]), Tree('VP', [Tree('VBZ', ['unveils']), Tree('NP', [Tree('NP', [Tree('NN', ['world']), Tree('POS', [\"'s\"])]), Tree('ADJP', [Tree('RBS', ['most']), Tree('JJ', ['powerful'])]), Tree('NN', ['supercomputer'])])])]), Tree(',', [',']), Tree('VP', [Tree('VBZ', ['beats'])]), Tree('NP', [Tree('NNP', ['China'])])])])"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from IPython.display import display\n",
    "os.environ['PATH'] = os.environ['PATH']+\";C:\\\\Program Files\\\\gs\\\\gs9.09\\\\bin\\\\\"\n",
    "display(result[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Dependency parsing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 448,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[]<---US[compound]--->[]\n",
      "--------\n",
      "['US']<---unveils[nsubj]--->['supercomputer', ',']\n",
      "--------\n",
      "[]<---world[poss]--->[\"'s\"]\n",
      "--------\n",
      "[]<---'s[case]--->[]\n",
      "--------\n",
      "[]<---most[amod]--->[]\n",
      "--------\n",
      "[]<---powerful[compound]--->[]\n",
      "--------\n",
      "['world', 'most', 'powerful']<---supercomputer[appos]--->[]\n",
      "--------\n",
      "[]<---,[punct]--->[]\n",
      "--------\n",
      "['unveils']<---beats[ROOT]--->['China']\n",
      "--------\n",
      "[]<---China[dobj]--->[]\n",
      "--------\n"
     ]
    }
   ],
   "source": [
    "dependency_pattern = '{left}<---{word}[{w_type}]--->{right}\\n--------'\n",
    "for token in sentence_nlp:\n",
    "    print(dependency_pattern.format(word=token.orth_, \n",
    "                                  w_type=token.dep_,\n",
    "                                  left=[t.orth_ \n",
    "                                            for t \n",
    "                                            in token.lefts],\n",
    "                                  right=[t.orth_ \n",
    "                                             for t \n",
    "                                             in token.rights]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 449,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Program Files\\Anaconda3\\lib\\runpy.py:184: DeprecationWarning: Positional arguments to Doc.merge are deprecated. Instead, use the keyword arguments, for example tag=, lemma= or ent_type=.\n",
      "  \"__main__\", mod_spec)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" id=\"0\" class=\"displacy\" width=\"1040\" height=\"412.0\" style=\"max-width: none; height: 412.0px; color: #000000; background: #ffffff; font-family: Arial\">\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"322.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"50\">US</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"50\">PROPN</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"322.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"160\">unveils</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"160\">NOUN</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"322.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"270\">world</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"270\">NOUN</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"322.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"380\">'s</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"380\">PART</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"322.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"490\">most</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"490\">ADJ</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"322.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"600\">powerful</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"600\">NOUN</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"322.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"710\">supercomputer,</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"710\">NOUN</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"322.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"820\">beats</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"820\">NOUN</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"322.0\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"930\">China</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"930\">PROPN</tspan>\n",
       "</text>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-0-0\" stroke-width=\"2px\" d=\"M70,277.0 C70,222.0 140.0,222.0 140.0,277.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-0-0\" class=\"displacy-label\" startOffset=\"50%\" fill=\"currentColor\" text-anchor=\"middle\">compound</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M70,279.0 L64,269.0 76,269.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-0-1\" stroke-width=\"2px\" d=\"M180,277.0 C180,2.0 820.0,2.0 820.0,277.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-0-1\" class=\"displacy-label\" startOffset=\"50%\" fill=\"currentColor\" text-anchor=\"middle\">nsubj</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M180,279.0 L174,269.0 186,269.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-0-2\" stroke-width=\"2px\" d=\"M290,277.0 C290,112.0 700.0,112.0 700.0,277.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-0-2\" class=\"displacy-label\" startOffset=\"50%\" fill=\"currentColor\" text-anchor=\"middle\">poss</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M290,279.0 L284,269.0 296,269.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-0-3\" stroke-width=\"2px\" d=\"M290,277.0 C290,222.0 360.0,222.0 360.0,277.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-0-3\" class=\"displacy-label\" startOffset=\"50%\" fill=\"currentColor\" text-anchor=\"middle\">case</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M360.0,279.0 L366.0,269.0 354.0,269.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-0-4\" stroke-width=\"2px\" d=\"M510,277.0 C510,167.0 695.0,167.0 695.0,277.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-0-4\" class=\"displacy-label\" startOffset=\"50%\" fill=\"currentColor\" text-anchor=\"middle\">amod</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M510,279.0 L504,269.0 516,269.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-0-5\" stroke-width=\"2px\" d=\"M620,277.0 C620,222.0 690.0,222.0 690.0,277.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-0-5\" class=\"displacy-label\" startOffset=\"50%\" fill=\"currentColor\" text-anchor=\"middle\">compound</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M620,279.0 L614,269.0 626,269.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-0-6\" stroke-width=\"2px\" d=\"M180,277.0 C180,57.0 705.0,57.0 705.0,277.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-0-6\" class=\"displacy-label\" startOffset=\"50%\" fill=\"currentColor\" text-anchor=\"middle\">appos</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M705.0,279.0 L711.0,269.0 699.0,269.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-0-7\" stroke-width=\"2px\" d=\"M840,277.0 C840,222.0 910.0,222.0 910.0,277.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-0-7\" class=\"displacy-label\" startOffset=\"50%\" fill=\"currentColor\" text-anchor=\"middle\">dobj</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M910.0,279.0 L916.0,269.0 904.0,269.0\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "</svg>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from spacy import displacy\n",
    "\n",
    "displacy.render(sentence_nlp, jupyter=True, \n",
    "                options={'distance': 110,\n",
    "                         'arrow_stroke': 2,\n",
    "                         'arrow_width': 8})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 450,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(beats (unveils US (supercomputer (world 's) (powerful most))) China)\n"
     ]
    }
   ],
   "source": [
    "from nltk.parse.stanford import StanfordDependencyParser\n",
    "sdp = StanfordDependencyParser(path_to_jar='E:/stanford/stanford-parser-full-2015-04-20/stanford-parser.jar',\n",
    "                               path_to_models_jar='E:/stanford/stanford-parser-full-2015-04-20/stanford-parser-3.5.2-models.jar')    \n",
    "result = list(sdp.raw_parse(sentence))  \n",
    "dep_tree = [parse.tree() for parse in result][0]\n",
    "print(dep_tree)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 451,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAM0AAACtCAIAAACcKmxkAAAACXBIWXMAAA3XAAAN1wFCKJt4AAAAHXRFWHRTb2Z0d2FyZQBHUEwgR2hvc3RzY3JpcHQgOS4wOfoZEaQAAAygSURBVHic7Z0xj9tGGobHd8EV2YqFtwgC2JjSCg44EOoCxAUXB2zgznQbpKH/QACqdEkCrtyZ3SLd0qWjRlNsai2RQ4BVc1hiN02QXWAHyJ0XTrVXfPYcQ1IUJVLDIfU+hUHJJDWkXn0zGi0f3ru7u2MAbJm/dN0AsBMgZ0AHyBnQAXIGdICcAR0gZ0AHyNlq4jieTCYbby6lbLExPQU5W43ruk02T5KkrZb0l3uYp63DZDKxLIsxJqX0PI9zzhhLkiSOY845PWlZlhBCCME5T9M0CALGmBAijmPalnPueR7tkLa1bTtNU8dxbNvu7uC0cAdq4Lruzc0NLfu+f3d3d3NzEwQBPZNdJo6Pj09PT2l5NpsVd+j7Pu3w/PxcrTlgPuk65/2Ac041iTFGC0mSpGmaG7eFYUgLUkq1fim+70dRRKupIjdgkLO1oXE959y27WxEkiRRoYnjuHonaZr6vk97C8OQOtkB89cXL1503QbTiaIoSRIazsdx/Nlnn9m2bVlWmqZv3759//59HMeLxeLLL798+/YtY0wIsVgskiR58uQJ7YHqXBzHv/7662g0Yoy9evUqTVMp5ffff2/bNj05ZLruuPvEbDZTozTi5uYmN/wqrlO62rInhwq+bwIdYP4M6AA5AzpAzoAOkLP2+dcvv8jb265bYRaYP2sHeXsrzs7EYiEWi/+8f3/9++/OaOQ8euSMRvaDB123rnvwfbMRyeXlh3idnTHG+P6+8+jR3z///L9//KGetPb23PHYfvjQHY+tTz/tusndgJytTbZ0pVdXjLGK0hXP58nlZTyf05r2w4fOaOSOx7tW5JCzupSWLopXnSqVXl/H83lycSEWC/nunbW3R9s6jx7x+/e33/yOQc6qWKt01Yd2KM7OkosL9rHIUWrbarlpIGclNCxd9Umvr8VikVxcxPO5fPeOMaZGcgMrcsjZB7ZUuupDwzhV5Pj+Pg3jWg93J+x6zrSVrvrI21s1kusk8dtgF3PWeemqT+nHoI9TJDuUMwNLV33os9HfKZKB56xHpas+aookns9ZZh7Y5CmSYeas16VrLfoyRTKcnA2ydNWHpkjoDKh5YHOmSHqfs90pXfVZNkXijsddNamXOdvx0lUfNUWi5oG7OlF9yhlKVxO6PXum5wylq3WKp1TDFImhOUPp0sOyKZLW54GNy9nzoyOUrk6gP5XLTpG447F/eNjKzo3L2cHLl/z+fZSuDlFTJHx/P3j6tJV9GpczMEhwvRPQAXIGdICcAR2YnrOcyC5JkslkstIuBpZBTmchhJRSCKHEgEWaqJ9L0GonWp/z8/Pik7ujc2oX3/fp1M1mM8/z7irPZOmZ35h2rkcXQiRJ4vt+mqZRFJHwN03TMAzJuEn6Qsuy4jgWQjiO47puFEVkAXYcZ5lT2LZtkg5XsHNO4Y1I05Rz7jgOY8xxHHVWoyhiH/2S5DotnvnSt1KtmfU+L6WtwGY/GWQKpgX6WGRNwep/z8/PX79+fVfpFC79wOWe3DWn8GbMZrPiyQyCQJ06ei+y62cflr6Viqz3uZSt+zXoM2FZlrpdg+d5URTRv/QhKHUK12fXnMItQgJUxhjVpOqVi29lfe9zazlTr73y9h90SNmWFZ3Ca7FrTuHNsG07iiLqNxljSZI0HGCs5X1us55RTy+lJGUwuYNV6creFsRxnGfPnh0fH9ND6vjDMLRtW7Weyht9LVJ3eCh9Mo7jJEk45zSqaPGIhoRlWY7jTCYTx3HoREkp4zhO01S9QTSGK57k0reSc06j7TRNqXCoEJewcX9f5Pz8vMmXlCZe4J1yCjek1NS87b3h902gA9PnacEwQM6ADpAzoAPkDOjAIA+yWCx+urz8x4MHpl1LDZpjRM7EYhFOp/SHwunVlTMa+YeHSNuQ6HheI5sw//DQ++or9QzSNiQ6y1kxYaX/i7QNgw5yVp2w0jWRtr6jNWf1E1a6FdLWXzTlbLOEle4BaesjW89Zcnk5ieMmCcuCtPWULeYsvb4Op9Po5KSVhGVB2nrHVnKmEmbt7XmPH7d17XwOpK1HtJyzXML8w8NtOzKQtl7QWs70JywL0mY4LeSs24RlQdqMpVHOzElYFqTNQDbMmZkJy4K0GcXaOZO3t+F0Gv7wg7EJy4K0GcIaOaOERScn8t07/+uvDU9YFqStc2rlLJswqmEm3GJjXZC2DlmRs2EkLAvS1gkrcvb86Cg6ORlGwrKotJ2+eAGZtwZW5Cy9vmaMDSlhWZLLS4RMD7geHegA19UBHSBnQAfIGdDBn67fJJGVZVm2bQsh2EffHwnGXNe1LGulLXY3WSk03HH+VM8oQ6SqIxsb6e8450EQkGmtm2YaT9YiCIqsvh5dSknJ8zyPilxbFD3Zpd5u0nJTtZBSuq5L2secn5sxRmu6rksaSxKIkkyUZKWWZZFhlDYn1WpWb0s+adq29HVLWyiEILMhY0xZJuu3cCco6vnUMomxT09PgyBQ6vkWKfVkl3q7Pc9T0kDaapmf2/O84+Pj3KsUl9XCzc0NHRrZpsmqv+x1K1qYOzlrtXAXWF3PbNumeka3z6B60Ar1PdlUadRyhZ/bsizXdZfth+oH3cBArU8LSiZd8bo1j4tVGsSrWzhU8jkrvgcqWzRia/G1Sz3Zpd5uKaUaaEspN/Zz005s2w7DsErau+R1cw2rMIs3NIgPj3zOXNcl77UQgkKgvngKIdr9IC7zZOe83Ywxy7Ko8lELS/3cjLHJZEI3gGKZoQ/nXK0ppQyCgL41q8Ok0RLpoqWUND4jt3TudStaSJpqurEL55y+m9ds4U5Q7EqL7urt2axL91z0dmfHWJu1qqiFrrN56euWtrB0h7CAK3rw+ybNp7iuW6enG8DrDpIe5AwMAPzuBHSAnAEdIGdAB0Z4kPVD159Of/75b5988vqbb3CVwLbZxe8BkzdvopMTxtg/v/ji37/9llxcuONx4LpD/fN0E9itnInF4vnRUXp15T1+HLguXX8aTqfhdMoY255CC+xKztLr6+dHR+LszH74MHDdXEcpb28ncUxKQHSj22D4OcuKGgLXrdZ7T+IY3eg2GHjOoh9/DKfT9OqqvqgB3eg2GGzOlH/ZGY0C113rOk10o60zwJypjpLv7wdPn7rj8Wb7QTfaIkPLGfV6LRqN0I22wnBytr3yg260OUPIWfZGBdvLAbrRJvQ+Z2pyX0+/hm50M3qcs9LJfQ2gG92AXuasenJfD+hG16JnOas/ua8HdKM16VPONpjc1wC60Tr0I2dNJvf1gG60GtNz1tbkvh7QjS7D6JzF8/nzo6N+3a4g243OvvsOhY0wOmfJ5WV0ctJH1bdYLOL5XOdsi+EYnTMwGHC9E9ABcgZ0gJwBHfTs+k2lGi0asshy1QvZE/neaLmiwRUH2z90yonaYpnsaZlGyjSCIMhJsioYhtlKaz2jkuN5Huc8a33KWYnXUgarbZsL5yuEy9m2VR/FSvFxFEVCCHL3OY5DlsyiW7nhsRiH5lxTySHp8DIrsVp5mTJYfcSziuHT01PXdRs2ryg+Lm1b8Sg2VjPnjqj4v6hnm6DqgfK+llqJ1cPqoUmSJEo4att285KWEx8va1vxKCA+rqaD7wHUR5C7lTFW30pchNy2tKzUsk3IiY8r2pY7iobi4zpu5V6jO2c0pqG7RpBnuWglpjVLlcH0UEophFDjmCiKSPaeq0AbkBMfL2tb8Sjqq5mjKFLP0CCP9ll0K5cebF/puuP+QBNlcPY2Fw1pKFxueBRFt/JgwO+b/wfi4+2BnAEd4HcnoAPkDOgAOQM6QM6ADozO2eTNm4OXL7tuxYaIxeLet9923QpTMDpnYDAgZ0AHyBnQAXIGdICcAR0gZ0AHyBnQAXIGdICcAR0gZ0AHyBnQAXIGdICcAR0gZ0AHyBnQAXIGdICcAR0Y7T/rnf44i7W354xGXbfCFHD9JtAB+k2gA+QM6AA5AzpAzoAOjMtZkiQHBwddt2KLDNWkV41xOWOMKUXjIFEavZ3CxHmNg4OD2WymHiqNfpqmjuMoIW1XhGFIblHLsshUSqLGUmt4rvFSSlqHMdZ7ReNadKv5KyWnRPR9nxzYLXobmzCbzci07Xne3SpreLHxw/Bnr4uJvwfkdIq+75MzVklfO4ekslkD9zIzt4GN7wQTc5ZDuYbpdjVm3llnmZm7F43XQA9yRqZ+Urh3PjhjjNHdTGiwReOzKIrInF00cxcbzzmfTCZ0/xTO+a7cWqDrjrsWTTzWOiltZ/HJvhxOi5j4fRMMDxPnz8DwQM6ADpAzoAPkDOgAOQM6QM6ADpAzoIP/AW1nV8SI0u3YAAAAAElFTkSuQmCC",
      "text/plain": [
       "Tree('beats', [Tree('unveils', ['US', Tree('supercomputer', [Tree('world', [\"'s\"]), Tree('powerful', ['most'])])]), 'China'])"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from IPython.display import display\n",
    "os.environ['PATH'] = os.environ['PATH']+\";C:\\\\Program Files\\\\gs\\\\gs9.09\\\\bin\\\\\"\n",
    "display(dep_tree)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 452,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/svg+xml": [
       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\r\n",
       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\r\n",
       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\r\n",
       "<!-- Generated by graphviz version 2.38.0 (20140413.2041)\r\n",
       " -->\r\n",
       "<!-- Title: G Pages: 1 -->\r\n",
       "<svg width=\"232pt\" height=\"479pt\"\r\n",
       " viewBox=\"0.00 0.00 232.00 479.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\r\n",
       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 475)\">\r\n",
       "<title>G</title>\r\n",
       "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-475 228,-475 228,4 -4,4\"/>\r\n",
       "<!-- 0 -->\r\n",
       "<g id=\"node1\" class=\"node\"><title>0</title>\r\n",
       "<text text-anchor=\"middle\" x=\"115\" y=\"-449.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">0 (None)</text>\r\n",
       "</g>\r\n",
       "<!-- 9 -->\r\n",
       "<g id=\"node2\" class=\"node\"><title>9</title>\r\n",
       "<text text-anchor=\"middle\" x=\"115\" y=\"-362.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">9 (beats)</text>\r\n",
       "</g>\r\n",
       "<!-- 0&#45;&gt;9 -->\r\n",
       "<g id=\"edge1\" class=\"edge\"><title>0&#45;&gt;9</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M115,-434.799C115,-423.163 115,-407.548 115,-394.237\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"118.5,-394.175 115,-384.175 111.5,-394.175 118.5,-394.175\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"127\" y=\"-405.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">root</text>\r\n",
       "</g>\r\n",
       "<!-- 2 -->\r\n",
       "<g id=\"node4\" class=\"node\"><title>2</title>\r\n",
       "<text text-anchor=\"middle\" x=\"54\" y=\"-275.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">2 (unveils)</text>\r\n",
       "</g>\r\n",
       "<!-- 9&#45;&gt;2 -->\r\n",
       "<g id=\"edge8\" class=\"edge\"><title>9&#45;&gt;2</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M100.113,-347.889C95.4456,-342.301 90.3738,-335.991 86,-330 80.3484,-322.259 74.58,-313.543 69.5253,-305.583\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"72.4344,-303.634 64.1661,-297.009 66.4985,-307.344 72.4344,-303.634\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"106\" y=\"-318.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">ccomp</text>\r\n",
       "</g>\r\n",
       "<!-- 10 -->\r\n",
       "<g id=\"node10\" class=\"node\"><title>10</title>\r\n",
       "<text text-anchor=\"middle\" x=\"147\" y=\"-275.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">10 (China)</text>\r\n",
       "</g>\r\n",
       "<!-- 9&#45;&gt;10 -->\r\n",
       "<g id=\"edge9\" class=\"edge\"><title>9&#45;&gt;10</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M121.476,-347.799C125.9,-336.047 131.851,-320.238 136.895,-306.842\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"140.286,-307.767 140.534,-297.175 133.735,-305.301 140.286,-307.767\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"148.5\" y=\"-318.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">nsubj</text>\r\n",
       "</g>\r\n",
       "<!-- 1 -->\r\n",
       "<g id=\"node3\" class=\"node\"><title>1</title>\r\n",
       "<text text-anchor=\"middle\" x=\"27\" y=\"-188.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">1 (US)</text>\r\n",
       "</g>\r\n",
       "<!-- 2&#45;&gt;1 -->\r\n",
       "<g id=\"edge3\" class=\"edge\"><title>2&#45;&gt;1</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M48.5361,-260.799C44.8033,-249.047 39.7816,-233.238 35.5263,-219.842\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"38.819,-218.647 32.4557,-210.175 32.1474,-220.766 38.819,-218.647\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"58.5\" y=\"-231.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">nsubj</text>\r\n",
       "</g>\r\n",
       "<!-- 7 -->\r\n",
       "<g id=\"node5\" class=\"node\"><title>7</title>\r\n",
       "<text text-anchor=\"middle\" x=\"132\" y=\"-188.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">7 (supercomputer)</text>\r\n",
       "</g>\r\n",
       "<!-- 2&#45;&gt;7 -->\r\n",
       "<g id=\"edge2\" class=\"edge\"><title>2&#45;&gt;7</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M69.7845,-260.799C81.0969,-248.471 96.5066,-231.679 109.16,-217.89\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"112.057,-219.91 116.239,-210.175 106.899,-215.177 112.057,-219.91\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"111\" y=\"-231.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">dobj</text>\r\n",
       "</g>\r\n",
       "<!-- 3 -->\r\n",
       "<g id=\"node6\" class=\"node\"><title>3</title>\r\n",
       "<text text-anchor=\"middle\" x=\"83\" y=\"-101.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">3 (world)</text>\r\n",
       "</g>\r\n",
       "<!-- 7&#45;&gt;3 -->\r\n",
       "<g id=\"edge6\" class=\"edge\"><title>7&#45;&gt;3</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M100.8,-173.948C94.5113,-169.01 88.702,-163.019 85,-156 81.3778,-149.132 80.041,-140.966 79.8353,-133.244\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"83.3365,-133.258 80.1476,-123.154 76.3398,-133.041 83.3365,-133.258\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"116.5\" y=\"-144.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">nmod:poss</text>\r\n",
       "</g>\r\n",
       "<!-- 6 -->\r\n",
       "<g id=\"node9\" class=\"node\"><title>6</title>\r\n",
       "<text text-anchor=\"middle\" x=\"178\" y=\"-101.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">6 (powerful)</text>\r\n",
       "</g>\r\n",
       "<!-- 7&#45;&gt;6 -->\r\n",
       "<g id=\"edge7\" class=\"edge\"><title>7&#45;&gt;6</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M141.902,-173.986C145.166,-168.295 148.783,-161.898 152,-156 156.197,-148.306 160.64,-139.874 164.643,-132.163\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"167.786,-133.705 169.262,-123.213 161.566,-130.495 167.786,-133.705\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"175\" y=\"-144.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">amod</text>\r\n",
       "</g>\r\n",
       "<!-- 4 -->\r\n",
       "<g id=\"node7\" class=\"node\"><title>4</title>\r\n",
       "<text text-anchor=\"middle\" x=\"83\" y=\"-14.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">4 (&#39;s)</text>\r\n",
       "</g>\r\n",
       "<!-- 3&#45;&gt;4 -->\r\n",
       "<g id=\"edge4\" class=\"edge\"><title>3&#45;&gt;4</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M83,-86.799C83,-75.1626 83,-59.5479 83,-46.2368\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"86.5001,-46.1754 83,-36.1754 79.5001,-46.1755 86.5001,-46.1754\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"95.5\" y=\"-57.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">case</text>\r\n",
       "</g>\r\n",
       "<!-- 5 -->\r\n",
       "<g id=\"node8\" class=\"node\"><title>5</title>\r\n",
       "<text text-anchor=\"middle\" x=\"178\" y=\"-14.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">5 (most)</text>\r\n",
       "</g>\r\n",
       "<!-- 6&#45;&gt;5 -->\r\n",
       "<g id=\"edge5\" class=\"edge\"><title>6&#45;&gt;5</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M178,-86.799C178,-75.1626 178,-59.5479 178,-46.2368\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"181.5,-46.1754 178,-36.1754 174.5,-46.1755 181.5,-46.1754\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"201\" y=\"-57.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">advmod</text>\r\n",
       "</g>\r\n",
       "</g>\r\n",
       "</svg>\r\n"
      ],
      "text/plain": [
       "<graphviz.files.Source at 0x1f855bd40b8>"
      ]
     },
     "execution_count": 452,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from graphviz import Source\n",
    "\n",
    "dep_tree_dot_repr = [parse for parse in result][0].to_dot()\n",
    "source = Source(dep_tree_dot_repr, filename=\"dep_tree\", format=\"png\")\n",
    "source"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Named Entity Recognition"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 453,
   "metadata": {},
   "outputs": [],
   "source": [
    "sentence = str(news_df.iloc[1].full_text)\n",
    "sentence_nlp = nlp(sentence)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 454,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[(US, 'GPE'), (China, 'GPE'), (US, 'GPE'), (China, 'GPE'), (Sunway, 'ORG'), (TaihuLight, 'ORG'), (200,000, 'CARDINAL'), (second, 'ORDINAL'), (Sunway, 'ORG'), (TaihuLight, 'ORG'), (93,000, 'CARDINAL'), (4,608, 'CARDINAL'), (two, 'CARDINAL')]\n"
     ]
    }
   ],
   "source": [
    "print([(word, word.ent_type_) for word in sentence_nlp if word.ent_type_])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 455,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div class=\"entities\" style=\"line-height: 2.5\">\n",
       "<mark class=\"entity\" style=\"background: #feca74; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n",
       "    US\n",
       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">GPE</span>\n",
       "</mark>\n",
       " unveils world's most powerful supercomputer, beats \n",
       "<mark class=\"entity\" style=\"background: #feca74; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n",
       "    China\n",
       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">GPE</span>\n",
       "</mark>\n",
       ". The \n",
       "<mark class=\"entity\" style=\"background: #feca74; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n",
       "    US\n",
       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">GPE</span>\n",
       "</mark>\n",
       " has unveiled the world's most powerful supercomputer called 'Summit', beating the previous record-holder \n",
       "<mark class=\"entity\" style=\"background: #feca74; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n",
       "    China\n",
       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">GPE</span>\n",
       "</mark>\n",
       "'s \n",
       "<mark class=\"entity\" style=\"background: #7aecec; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n",
       "    Sunway TaihuLight\n",
       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">ORG</span>\n",
       "</mark>\n",
       ". With a peak performance of \n",
       "<mark class=\"entity\" style=\"background: #e4e7d2; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n",
       "    200,000\n",
       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">CARDINAL</span>\n",
       "</mark>\n",
       " trillion calculations per \n",
       "<mark class=\"entity\" style=\"background: #e4e7d2; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n",
       "    second\n",
       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">ORDINAL</span>\n",
       "</mark>\n",
       ", it is over twice as fast as \n",
       "<mark class=\"entity\" style=\"background: #7aecec; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n",
       "    Sunway TaihuLight\n",
       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">ORG</span>\n",
       "</mark>\n",
       ", which is capable of \n",
       "<mark class=\"entity\" style=\"background: #e4e7d2; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n",
       "    93,000\n",
       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">CARDINAL</span>\n",
       "</mark>\n",
       " trillion calculations per second. Summit has \n",
       "<mark class=\"entity\" style=\"background: #e4e7d2; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n",
       "    4,608\n",
       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">CARDINAL</span>\n",
       "</mark>\n",
       " servers, which reportedly take up the size of \n",
       "<mark class=\"entity\" style=\"background: #e4e7d2; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n",
       "    two\n",
       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">CARDINAL</span>\n",
       "</mark>\n",
       " tennis courts.</div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "displacy.render(sentence_nlp, style='ent', jupyter=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "named_entities = []\n",
    "for sentence in corpus:\n",
    "    temp_entity_name = ''\n",
    "    temp_named_entity = None\n",
    "    sentence = nlp(sentence)\n",
    "    for word in sentence:\n",
    "        term = word.text \n",
    "        tag = word.ent_type_\n",
    "        if tag:\n",
    "            temp_entity_name = ' '.join([temp_entity_name, term]).strip()\n",
    "            temp_named_entity = (temp_entity_name, tag)\n",
    "        else:\n",
    "            if temp_named_entity:\n",
    "                named_entities.append(temp_named_entity)\n",
    "                temp_entity_name = ''\n",
    "                temp_named_entity = None\n",
    "\n",
    "entity_frame = pd.DataFrame(named_entities, \n",
    "                            columns=['Entity Name', 'Entity Type'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>10</th>\n",
       "      <th>11</th>\n",
       "      <th>12</th>\n",
       "      <th>13</th>\n",
       "      <th>14</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Entity Name</th>\n",
       "      <td>US</td>\n",
       "      <td>India</td>\n",
       "      <td>Indian</td>\n",
       "      <td>Singapore</td>\n",
       "      <td>Kim Jong - un</td>\n",
       "      <td>one</td>\n",
       "      <td>Apple</td>\n",
       "      <td>two</td>\n",
       "      <td>first</td>\n",
       "      <td>Messenger</td>\n",
       "      <td>China</td>\n",
       "      <td>Canadian</td>\n",
       "      <td>Facebook</td>\n",
       "      <td>Yahoo</td>\n",
       "      <td>Trump</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Entity Type</th>\n",
       "      <td>GPE</td>\n",
       "      <td>GPE</td>\n",
       "      <td>NORP</td>\n",
       "      <td>GPE</td>\n",
       "      <td>PERSON</td>\n",
       "      <td>CARDINAL</td>\n",
       "      <td>ORG</td>\n",
       "      <td>CARDINAL</td>\n",
       "      <td>ORDINAL</td>\n",
       "      <td>PRODUCT</td>\n",
       "      <td>GPE</td>\n",
       "      <td>NORP</td>\n",
       "      <td>ORG</td>\n",
       "      <td>ORG</td>\n",
       "      <td>ORG</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Frequency</th>\n",
       "      <td>30</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>11</td>\n",
       "      <td>11</td>\n",
       "      <td>10</td>\n",
       "      <td>9</td>\n",
       "      <td>8</td>\n",
       "      <td>8</td>\n",
       "      <td>7</td>\n",
       "      <td>7</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              0      1       2          3              4         5      6   \\\n",
       "Entity Name   US  India  Indian  Singapore  Kim Jong - un       one  Apple   \n",
       "Entity Type  GPE    GPE    NORP        GPE         PERSON  CARDINAL    ORG   \n",
       "Frequency     30     12      12         11             11        10      9   \n",
       "\n",
       "                   7        8          9      10        11        12     13  \\\n",
       "Entity Name       two    first  Messenger  China  Canadian  Facebook  Yahoo   \n",
       "Entity Type  CARDINAL  ORDINAL    PRODUCT    GPE      NORP       ORG    ORG   \n",
       "Frequency           8        8          7      7         6         6      6   \n",
       "\n",
       "                14  \n",
       "Entity Name  Trump  \n",
       "Entity Type    ORG  \n",
       "Frequency        6  "
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "top_entities = (entity_frame.groupby(by=['Entity Name', 'Entity Type'])\n",
    "                           .size()\n",
    "                           .sort_values(ascending=False)\n",
    "                           .reset_index().rename(columns={0 : 'Frequency'}))\n",
    "top_entities.T.iloc[:,:15]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>10</th>\n",
       "      <th>11</th>\n",
       "      <th>12</th>\n",
       "      <th>13</th>\n",
       "      <th>14</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Entity Type</th>\n",
       "      <td>PERSON</td>\n",
       "      <td>GPE</td>\n",
       "      <td>ORG</td>\n",
       "      <td>DATE</td>\n",
       "      <td>CARDINAL</td>\n",
       "      <td>NORP</td>\n",
       "      <td>EVENT</td>\n",
       "      <td>ORDINAL</td>\n",
       "      <td>PRODUCT</td>\n",
       "      <td>MONEY</td>\n",
       "      <td>TIME</td>\n",
       "      <td>LOC</td>\n",
       "      <td>FAC</td>\n",
       "      <td>QUANTITY</td>\n",
       "      <td>WORK_OF_ART</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Frequency</th>\n",
       "      <td>165</td>\n",
       "      <td>126</td>\n",
       "      <td>105</td>\n",
       "      <td>67</td>\n",
       "      <td>66</td>\n",
       "      <td>58</td>\n",
       "      <td>23</td>\n",
       "      <td>21</td>\n",
       "      <td>15</td>\n",
       "      <td>11</td>\n",
       "      <td>7</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 0    1    2     3         4     5      6        7        8   \\\n",
       "Entity Type  PERSON  GPE  ORG  DATE  CARDINAL  NORP  EVENT  ORDINAL  PRODUCT   \n",
       "Frequency       165  126  105    67        66    58     23       21       15   \n",
       "\n",
       "                9     10   11   12        13           14  \n",
       "Entity Type  MONEY  TIME  LOC  FAC  QUANTITY  WORK_OF_ART  \n",
       "Frequency       11     7    5    5         3            1  "
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "top_entities = (entity_frame.groupby(by=['Entity Type'])\n",
    "                           .size()\n",
    "                           .sort_values(ascending=False)\n",
    "                           .reset_index().rename(columns={0 : 'Frequency'}))\n",
    "top_entities.T.iloc[:,:15]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "from nltk.tag import StanfordNERTagger\n",
    "import os\n",
    "\n",
    "java_path = r'C:\\Program Files\\Java\\jdk1.8.0_102\\bin\\java.exe'\n",
    "os.environ['JAVAHOME'] = java_path\n",
    "\n",
    "sn = StanfordNERTagger('E:/stanford/stanford-ner-2014-08-27/classifiers/english.all.3class.distsim.crf.ser.gz',\n",
    "                       path_to_jar='E:/stanford/stanford-ner-2014-08-27/stanford-ner.jar')\n",
    "\n",
    "ner_tagged_sentences = [sn.tag(sent.split()) for sent in corpus]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "named_entities = []\n",
    "for sentence in ner_tagged_sentences:\n",
    "    temp_entity_name = ''\n",
    "    temp_named_entity = None\n",
    "    for term, tag in sentence:\n",
    "        if tag != 'O':\n",
    "            temp_entity_name = ' '.join([temp_entity_name, term]).strip()\n",
    "            temp_named_entity = (temp_entity_name, tag)\n",
    "        else:\n",
    "            if temp_named_entity:\n",
    "                named_entities.append(temp_named_entity)\n",
    "                temp_entity_name = ''\n",
    "                temp_named_entity = None\n",
    "\n",
    "#named_entities = list(set(named_entities))\n",
    "entity_frame = pd.DataFrame(named_entities, \n",
    "                            columns=['Entity Name', 'Entity Type'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Entity Name</th>\n",
       "      <th>Entity Type</th>\n",
       "      <th>Frequency</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>US</td>\n",
       "      <td>LOCATION</td>\n",
       "      <td>31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Donald Trump</td>\n",
       "      <td>PERSON</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>India</td>\n",
       "      <td>LOCATION</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Trump</td>\n",
       "      <td>PERSON</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Singapore</td>\n",
       "      <td>LOCATION</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Kim Jong-un</td>\n",
       "      <td>PERSON</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Facebook</td>\n",
       "      <td>ORGANIZATION</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Yahoo</td>\n",
       "      <td>ORGANIZATION</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Kim</td>\n",
       "      <td>PERSON</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Nadal</td>\n",
       "      <td>PERSON</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Google</td>\n",
       "      <td>ORGANIZATION</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>Trudeau</td>\n",
       "      <td>PERSON</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>China</td>\n",
       "      <td>LOCATION</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>North Korean</td>\n",
       "      <td>LOCATION</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>Chhetri</td>\n",
       "      <td>PERSON</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Entity Name   Entity Type  Frequency\n",
       "0             US      LOCATION         31\n",
       "1   Donald Trump        PERSON         13\n",
       "2          India      LOCATION         13\n",
       "3          Trump        PERSON         12\n",
       "4      Singapore      LOCATION         11\n",
       "5    Kim Jong-un        PERSON          9\n",
       "6       Facebook  ORGANIZATION          9\n",
       "7          Yahoo  ORGANIZATION          6\n",
       "8            Kim        PERSON          6\n",
       "9          Nadal        PERSON          6\n",
       "10        Google  ORGANIZATION          5\n",
       "11       Trudeau        PERSON          5\n",
       "12         China      LOCATION          5\n",
       "13  North Korean      LOCATION          4\n",
       "14       Chhetri        PERSON          4"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "top_entities = (entity_frame.groupby(by=['Entity Name', 'Entity Type'])\n",
    "                           .size()\n",
    "                           .sort_values(ascending=False)\n",
    "                           .reset_index().rename(columns={0 : 'Frequency'}))\n",
    "top_entities.head(15)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 462,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Entity Type</th>\n",
       "      <th>Frequency</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>PERSON</td>\n",
       "      <td>186</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>LOCATION</td>\n",
       "      <td>125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>ORGANIZATION</td>\n",
       "      <td>54</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Entity Type  Frequency\n",
       "0        PERSON        186\n",
       "1      LOCATION        125\n",
       "2  ORGANIZATION         54"
      ]
     },
     "execution_count": 462,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "top_entities = (entity_frame.groupby(by=['Entity Type'])\n",
    "                           .size()\n",
    "                           .sort_values(ascending=False)\n",
    "                           .reset_index().rename(columns={0 : 'Frequency'}))\n",
    "top_entities.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Emotion and Sentiment Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "from afinn import Afinn\n",
    "\n",
    "af = Afinn()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "sentiment_scores = [af.score(article) for article in corpus]\n",
    "sentiment_category = ['positive' if score > 0 \n",
    "                          else 'negative' if score < 0 \n",
    "                              else 'neutral' \n",
    "                                  for score in sentiment_scores]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"8\" halign=\"left\">sentiment_score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>news_category</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>sports</th>\n",
       "      <td>25.0</td>\n",
       "      <td>2.16</td>\n",
       "      <td>7.363649</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>20.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>technology</th>\n",
       "      <td>24.0</td>\n",
       "      <td>-0.25</td>\n",
       "      <td>4.936554</td>\n",
       "      <td>-15.0</td>\n",
       "      <td>-4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>world</th>\n",
       "      <td>25.0</td>\n",
       "      <td>1.48</td>\n",
       "      <td>6.042351</td>\n",
       "      <td>-12.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>16.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              sentiment_score                                           \n",
       "                        count  mean       std   min  25%  50%  75%   max\n",
       "news_category                                                           \n",
       "sports                   25.0  2.16  7.363649 -10.0 -3.0  0.0  7.0  20.0\n",
       "technology               24.0 -0.25  4.936554 -15.0 -4.0  0.0  3.0   6.0\n",
       "world                    25.0  1.48  6.042351 -12.0 -1.0  1.0  5.0  16.0"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame([list(news_df['news_category']), sentiment_scores, sentiment_category]).T\n",
    "df.columns = ['news_category', 'sentiment_score', 'sentiment_category']\n",
    "df['sentiment_score'] = df.sentiment_score.astype('float')\n",
    "df.groupby(by=['news_category']).describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA0oAAAEkCAYAAAARu8HuAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzs3Xd8VfX9x/HXJyEQ9pa9FRXZIENE\nQNyzKqg4EEdtf0qttVpcqJW21tVaraMOUByoOHChInuICigCAjIElCmbsDM+vz/uTciE3OTenIz3\n8/G4j+TsNzfA535yzvkec3dERERERETkkLigA4iIiIiIiBQ3apRERERERESyUaMkIiIiIiKSjRol\nERERERGRbNQoiYiIiIiIZKNGSUREREREJBs1SiIiUWZmbmYDgjxmJBnM7GUz+zh26SRSQfwdEhGR\nrNQoiYjkg5l9ZGYT81h2fPiD7enhWQ2Aj4ouXa4iyfBH4KoYZgHAzIaE36cc72NxawzMrIOZfWBm\nG81sv5n9bGbvmlmzKB8nrya1OPwdAorfz0ZEpKioURIRyZ8XgVPNrHkuy64H1gCTANx9o7sfKLpo\nOUWSwd13uvuOWGcKSwX6mNmZRXS8iJlZXUI/y93AucBxwNXASqBaUWQoDn+HRETKOjVKIiL58wmw\nCbg280wzSyD0IXqku6eF52W/DO4+M1tjZgfCZyhGZ1o21cz+m22fWc4ymNlZZjbDzLab2TYz+9zM\njj9c2MwZwvvzXF5D8jjeVDN7xsz+YWZbzOxXM3vMzOIyrVPPzD40s33hP9u1ZrbIzB44wvu4H3ge\neDjz/nLJX93Mng8fO8nMpplZ10zLN5rZZZmmZ4XXKxeePib8Z2wUnr7YzBaE824L769eHofvBdQE\nrnX3ee6+2t2nuftf3H1hpmM2MrM3wz+X7Wb2iZkdk2n5A+H35HIzWxnON87M6qQvB64Bzs30M+kb\nXpb559c8PH15OPc+M/vOzNqbWVsz+9LM9pjZTDNrke19PN/M5oXPiq0ys7+bWflMy1eb2b1m9j8z\n22Vma83sjszLw9+ODWdYjYhIGaFGSUQkH9w9BXgFGJLtA/75QB1gVG7bmdklwO3ATcAxwHnANxEe\nvjLwBNAN6AvsBD7K/IH3CP5I6FKu9Nc9wF5g7mG2uRJIAU4ChgK3ApdlWv4K0Aw4FbiQ0KV7+b0s\n7a9Aq/AxcjAzI9SYNiL0fnUCpgOTzaxBeLVpQL/w+pWArsCB8FcIvU8r3H2dmdUH3gxnPh44BXj1\nMPk2EqqPA8JZcstYCZhCqPHrA/QENgATw8vSNSf0vl0EnBH+s/w9vOwx4G1gIod+Nl8eJtdfgYfD\n+9gBvAE8Rejn2Q1IBJ7MlPFM4HXgv8AJwHXAAOAf2fb7J2Ah0Dm8/0fMrGd42Ynhr78N5zsREZEy\nQo2SiEj+vQQ0BU7LNO96YIK7/5LHNs0IfYCe4O4/u/tcd/9vHuvmyt3fDb+Wu/sCQme1WhD6cJyf\n7XeGL+XaCBwN3Adc7e6LDrPZYne/z92XufvbhJqC/gBmdixwJvA7d5/t7vOBIUClPPeWNc+vhJqE\nEWZWIZdV+gEdgQHu/o27r3D34cBPhM7eAUwNrwehM0A/EWqu0uf1Da8D0BBIAN4Jnx1a5O4vuvum\nPPJ9RaiZeAXYZmYTzOxuy3p/0uWAETrrtMDdlwK/A6oQau7SlQOGhNeZTehsWv/wcXYD+4AD6T8f\ndz+Y9zvHv9x9fPhYjxNqfp5y9ynu/gOhhqhfpvXvAR5191HuvtLdpwDDgN9nawAnuPt/w+/zU8CK\nTBk3h9fZEc63GRGRMkKNkohIPrn7ckJnNq4DMLOGhBqGFw+z2VhCv+lfZWYvmdnAPJqDPJlZKzN7\nI3z51i5ClwDGEWraItlPc+A94EF3f+8Iqy/INr0eOCr8/XFAGpnOSIUbxfURxHmc0Ptycy7LuhBq\nujab2e70F9CW0JkoCDVBrcM/g76EGrmp4e8hdJZnavj77wmdtVlkoQEZ/s9C9yHlyd3vAeoDNxI6\n23I9sNjM+mfK2AJIypRvJ6FL9lpl2tUad9+ZaTrz+xipzD+T9CZvYbZ5lTOd0eoC3JPtPXyD0BnK\n+nnst7AZRURKjXJBBxARKWFeBF4ws1qEzqJsAz7Ma2V3/yV8BqY/oTNRjwP3m1l3d99DqOHIfnlX\nQrbpj4B1hM5YrCN0SdxiIL+X3mFmVcI5P3f37Jde5SY5+x+FQ79cy/VytEi4+24ze5DQWaWR2RbH\nEfrQ3zuXTXeFt19iZpsINUZ9CV2aOAd4yszaELpsb2p43VQzOwPoQejyt+uBh8ysj7t/f5iMWwk1\numPN7C7gO2A4oYEe4oD5hM4sZbct0/eHex8jlXlffph5cZm+/pXQnyG7zGeGoplRRKTUUKMkIhKZ\ndwjdF3IVoTNLo909+wfNLNx9P6HLwj4xs38SugemFzCB0AfWBtk26QCsBjCz2oTuq7k5fOkUZtaZ\nCP7/Dt9T9TqQBNyQ3+0OYwmhD9JdgK/Dx2hM6BK3SDxP6N6nO7PN/xaoB6S5+0+H2X4aoVHpugLT\n3P1XM9sC/IXw/UnpK7q7A7OB2eEG7QdC9w7l2Shl5u4HzWwlh/6M3wKDgC2FHDHwIBBfiO0P51vg\nOHdfUcj9JBO7jCIixZYaJRGRCLj7PjN7A3iA0GVWLx1ufQuNLFeOUEOxm9CH82RgeXiVycATZnYB\n8COhs0ZNCDdKwHZgC/BbM/uF0JmSRwmdVcqv+wkNNnAaUDPT7Sk73X1fBPsBwN1/NLPPgefM7P8I\nDWjwKKEBIvywG2fdT4qZ3Q2MzrZoIjAL+MDM/gIsJXSp2FnARHefEV5vKqGmdWn4vicINU9XkWlw\nDTPrQejP/jmhM1WdCL3Hi3PLZWbnETpT9CawjNAZtPOBcwi9lxBqPG8PZ7wP+Dm8zwuB58KXaebH\nauDs8FnHrYR+JodtvCPwIPCxma0hNGhECqHLF7u5+18i2M9qoL+ZTSN0P9X2KOUTESnWdGpdRCRy\nLxJqkr509yVHWHcHoUu9ZgCLgEuAi919VXj5yEyvWYSaqffTNw4POX4Z0D68/dOELv+K5Bk7fYC6\nhM6ebMj0uuxwGx3BEGAtoWblQ0KNw6+EmqZ8c/d3yHaPTPjszzmEmsgXCDWQbwPHkvU+qCmEznRM\nPcK8nYTO4H1MqEF9HBjh7q/lEWsxoZ/DY4Qut/uGUPN1O+ER49x9L6HR834idGnbUkKDP9Qk1Nzm\n1wuEztDNJXR2sVcE2x6Wu39O6IxbP0J/hm8Inb37OcJd/Tm8j18IvR8iImWCheqRiIhIwYWfDbQe\nGOTu7wadR0REpLB06Z2IiETMzE4FqhIade0oQs8G2gJ8FmQuERGRaFGjJCIiBZEA/A1oSejepK+B\nU8Ij+YmIiJR4uvROREREREQkGw3mICIiIiIiko0aJRERERERkWzUKImIiIiIiGSjRklERERERCQb\nNUoiIiIiIiLZqFESERERERHJRo2SiIiIiIhINmqUREREREREslGjJCIiIiIiko0aJRERERERkWzU\nKImIiIiIiGSjRklERERERCQbNUoiIiIiIiLZqFESERERERHJplzQAaKlTp063rx586BjiIiUafPm\nzdvi7nWDzlEcqU6JiAQvkjpVahql5s2bM3fu3KBjiIiUaWa2JugMxZXqlIhI8CKpU7r0TkRERERE\nJBs1SiIiIiIiItmoURIREREREcmm1NyjlJvk5GTWrl3L/v37g45S6iUmJtK4cWMSEhKCjiIiUmKo\nThUd1SkRiVSpbpTWrl1L1apVad68OWYWdJxSy93ZunUra9eupUWLFkHHERGJGjNrAowG6gNpwPPu\n/h8zqwW8BTQHVgOXuvv2SPevOlU0VKdEpCACvfTOzJqY2RQzW2JmP5jZH8Pza5nZF2a2PPy1ZkH2\nv3//fmrXrq3iE2NmRu3atfUbUREpjVKAP7v78UAP4GYzawPcCUxy92OASeHpiKlOFQ3VKREpiKDv\nUYppAQJUfIqI3uco2PYT7NsRdAoRycTdN7j7t+Hvk4AlQCPgQuCV8GqvAL8p6DH0/2fR0PssIpEK\n9NI7d98AbAh/n2RmmQtQ3/BqrwBTgWEBRCw2xo0bR+vWrWnTpk3QUSTa9myFNy6FdXOhXCKcOhxO\nGhp0KhHJxsyaA52Ar4F64RqGu28ws6MCjFYsqE6VfKNHj2bNmrwfMbNx40YA6tevn+c6zZo1Y/Dg\nwVHPJhKEoM8oZThcAQJUgMaNY/HixTE9Rmpqakz3L3mY+a9QkwSQsh++uA92rgs2k4hkYWZVgHeB\nW919VwTb3Whmc81s7ubNm2MXsBhQnSr9Dhw4wIEDB4KOIVJ03D3wF1AFmAdcHJ7ekW359jy2uxGY\nC8xt2rSpZ7d48eIc87JbtWqVH3fccX7DDTd4mzZt/PTTT/e9e/f6ihUr/Mwzz/TOnTv7ySef7EuW\nLPGUlBRv0aKFp6Wl+fbt293MfNq0ae7ufvLJJ/vy5ct96tSp3qFDB+/QoYN37NjRd+3aleexH374\nYW/btq23b9/ehw0b5u7uzz//vHft2tXbt2/vF198se/Zs8dnzZrlNWvW9ObNm3uHDh18xYoVueZz\nd1+xYoV3797du3bt6sOHD/fKlSu7u3taWprffvvtfsIJJ3jbtm39zTffdHf3KVOmeN++fX3QoEF+\n/PHH+7333utPPPFERsa7777b//Of/xzxfczv+y25eP1S9/urZX39ND3oVCIFAsz1YlBXovkCEoDP\ngdsyzfsRaBD+vgHw45H206VLlxzvl+qU6lRJ8uCDD/qDDz4YdAyRQomkTqkArVrl8fHx/t1337m7\n+8CBA/3VV1/1U0891ZctW+bu7l999ZX369fP3d3PPPNMX7RokX/00UfetWtX/9vf/ub79+/35s2b\nu7v7eeed5zNnznR396SkJE9OTs71uOPHj/eePXv6nj173N1969at7u6+ZcuWjHXuuecef/LJJ93d\n/ZprrvGxY8dmLMsr37nnnutvvPGGu7s/++yzGQXonXfe8dNOO81TUlJ848aN3qRJE1+/fr1PmTLF\nK1Wq5D/99FPG+9GpUyd3d09NTfWWLVtmyXQ4KkAFNPflrE3SI0e7H9wXdCqRAiltjRJghEa9eyLb\n/EeBO8Pf3wk8cqR9qU6pTpV0apSkNIikTgV6j5KF7qx8CVji7v/KtOhD4Brgn+GvH8QyR4sWLejY\nsSMAXbp0YfXq1Xz55ZcMHDgwY530U829e/dm+vTprFq1irvuuosXXniBPn36cOKJJwLQq1cvbrvt\nNq688kouvvhiGjdunOsxJ06cyLXXXkulSpUAqFWrFgCLFi3i3nvvZceOHezevZszzzwzx7a7d+/O\nM9/s2bMZN24cAFdccQW33347ADNnzmTQoEHEx8dTr149+vTpw5w5c6hWrRrdunXLGC61efPm1K5d\nm++++45NmzbRqVMnateuXcB3VvKlyzWQvBcWvA3VGkK/uyEhMehUIhLSC7gaWGhm88Pz7iZUn942\ns+uBn4GBeWwfFapTqlMiUvSCfo5SsShAFSpUyPg+Pj6eTZs2UaNGDebPn59j3d69e/Pcc8+xfv16\nHnzwQR599FGmTp3KKaecAsCdd97Jueeey/jx4+nRowcTJ07kuOOOy7Efd891BJ4hQ4Ywbtw4OnTo\nwMsvv8zUqVNzrJOWlpZnvryEGujcVa5cOcv0DTfcwMsvv8zGjRu57rrr8n0MKYQe/xd6iUix4u4z\nCZ1Vyk3/osqhOqU6JSJFL9DBHNx9prubu7d3947h13h33+ru/d39mPDXbUWZq1q1arRo0YKxY8em\n5+T7778HoHv37nz55ZfExcWRmJhIx44d+d///kfv3r0BWLlyJe3atWPYsGF07dqVpUuX5nqMM844\ng5EjR7J3714Atm0L/RGTkpJo0KABycnJvP766xnrV61alaSkpCPm69GjB++++y4Ab775Zsb2p5xy\nCm+99Rapqals3ryZ6dOn061bt1yzXXTRRXz22WfMmTMn198UiohIsFSnVKdEJPaKzah3xc3rr7/O\nSy+9RIcOHTjhhBP44IPQ1X8VKlSgSZMm9OjRAwj95i4pKYl27doB8MQTT9C2bVs6dOhAxYoVOfvs\ns3Pd/1lnncUFF1xA165d6dixI4899hgAI0aMoHv37px++ulZfsN3+eWX8+ijj9KpUydWrlyZZ74n\nnniCf/3rX3Tr1o0NGzZQvXp1IFRU2rdvT4cOHTj11FN55JFH8hzes3z58vTr149LL72U+Pj4KLyb\nIiISbapTqlMiElt2uFPdJUnXrl197ty5WeYtWbKE448/PqBEwdi7dy8VK1bEzHjzzTcZM2ZMRnHK\nr7S0NDp37szYsWM55phj8r1dWXy/RSQrM5vn7l2DzlEcqU6FqE6VXCNGjABg+PDhAScRKbhI6lTQ\n9yhJlM2bN4+hQ4fi7tSoUYORI0dGtP3ixYs577zzuOiiiyIqPiIiIvmhOiUiJYUapRhbuHAhV199\ndZZ5FSpU4Ouvv47J8Xr37p1xHXhBtGnThp9++imKiUREpDhTnRIRyZ0apRhr165dRKP+iIiIFCXV\nKRGR3GkwBxERERERkWzUKImIiIiIiGSjRklERERERCQbNUoiIiIiIiLZqFGKoR07dvDMM88UaNsh\nQ4bwzjvvRCVH3759yf7sDhEREVCtEhHJi0a9C0tLcz78fj0vzVzFhp37aFC9Itef3IILOjQkLs4K\ntM/04nPTTTdFOa2IiJRGI/75ENt27Yja/mpVq8HwO+867DqqVSIiudMZJUJN0u9fm8fd7y9k4bqd\nbNl9kIXrdnLXewv5/WvzSEvzAu33zjvvZOXKlXTs2JE77riDRx99lBNPPJH27dtz//33Z6w3evRo\n2rdvT4cOHbI8y2L69OmcdNJJtGzZMuM3dlOnTqVv374MGDCA4447jiuvvBL3UL5JkybRqVMn2rVr\nx3XXXceBAwdyZBozZgzt2rWjbdu2DBs2LGP+Sy+9ROvWrenbty+//e1vGTp0KElJSbRo0YLk5GQA\ndu3aRfPmzTOmRUQkurbt2kFc7xOi9spP06VaJSKSOzVKwIffr2fmii3sPZiaZf6+5FRmLN/CRwvW\nF2i///znP2nVqhXz58/n9NNPZ/ny5XzzzTfMnz+fefPmMX36dH744Qf+/ve/M3nyZL7//nv+85//\nZGy/YcMGZs6cyccff8ydd96ZMf+7777jiSeeYPHixfz000/MmjWL/fv3M2TIEN566y0WLlxISkoK\nzz77bJY869evZ9iwYUyePJn58+czZ84cxo0bx/r16xkxYgRfffUVX3zxBUuXLgWgatWq9O3bl08+\n+QSAN998k0suuYSEhIQCvR8iIlL8qFaJSJC2b9/Ogw8+yI4d0TubHi1qlICXZq7K0SSl25ecyosz\nVhX6GBMmTGDChAl06tSJzp07s3TpUpYvX87kyZMZMGAAderUAaBWrVoZ2/zmN78hLi6ONm3asGnT\npoz53bp1o3HjxsTFxdGxY0dWr17Njz/+SIsWLWjdujUA11xzDdOnT8+SYc6cOfTt25e6detSrlw5\nrrzySqZPn84333xDnz59qFWrFgkJCQwcODBjmxtuuIFRo0YBMGrUKK699tpCvxciIlI8qVaJSFF7\n//33+fHHH3nvvfeCjpKDGiVgw859hVqeH+7OXXfdxfz585k/fz4rVqzg+uuvx90xy/0eqAoVKmTZ\nPrf58fHxpKSkZFl+uAyRzAfo1asXq1evZtq0aaSmptK2bdsjHkdEREom1SoRKUrbt29n2rRpuDvT\np08vdmeV1CgBDapXLNTyvFStWpWkpCQAzjzzTEaOHMnu3bsBWLduHb/++iv9+/fn7bffZuvWrQBs\n27atQMc67rjjWL16NStWrADg1VdfpU+fPlnW6d69O9OmTWPLli2kpqYyZswY+vTpQ7du3Zg2bRrb\nt28nJSWFd999N8t2gwcPZtCgQfoNnUiElm9fzrDpw7hp4k1MWjMp6DgiuVKtEpGgvP/++xm/BElL\nSyt2Z5XUKAHXn9yCignxuS6rmBDPDb1bFGi/tWvXplevXrRt25YvvviCK664gp49e9KuXTsGDBhA\nUlISJ5xwAvfccw99+vShQ4cO3HbbbQU6VmJiIqNGjWLgwIG0a9eOuLg4fv/732dZp0GDBjz00EP0\n69ePDh060LlzZy688EIaNWrE3XffTffu3TnttNNo06YN1atXz9juyiuvZPv27QwaNKhA2UTKoqSD\nSQz5bAjjV41nxroZ3Dr1Vr5c/2XQsURyUK0SkaDMmjWLlJQUAFJSUpg1a1bAibKy/JwGLwm6du3q\n2Z+/sGTJEo4//vgjbps+6t2M5VvYl3zoXqWKCfH0PqYOz13VpcBDhJcUu3fvpkqVKqSkpHDRRRdx\n3XXXcdFFFwHwzjvv8MEHH/Dqq68edh/5fb9FyoIJqyfw52l/zjLvkmMu4YGTHggmUBExs3nu3jXo\nHMVRfupUEMODlySFrVWqU4UzYsQIAIYPHx5wEiktRo4cydSpU0lJSaFcuXL07duX6667LqbHjKRO\n6TlKQFyc8dxVXfhowXpenHHoOUo39G7B+e0L/hylkuSBBx5g4sSJ7N+/nzPOOIPf/OY3APzhD3/g\n008/Zfz48QEnFClZGlZpmGNeg8oNAkgihWVmI4HzgF/dvW143gPAb4HN4dXudvdC/0dZmpqaWFCt\nEildLrroIqZNmwZAXFwcF198ccCJsgq8USrKAnQ4cXHGhR0bcWHHRrE8TLH12GOP5Tr/qaeeKuIk\nIqVD2zptuezYy3j7x7dxnBNqn8Dlx10edCwpmJeB/wKjs83/t7vn/p+nxIRqlUjpUrNmTfr06cOk\nSZM45ZRTqFGjRtCRsgi8UUIFSERKqXt73MuQE4aQdDCJ42vrcp+Syt2nm1nzoHOIiJRGF110EWvX\nri12Z5OgGDRKKkAiUpo1rto46AgSO0PNbDAwF/izu28POpCISElTs2ZN7rvvvqBj5Ko4j3o31MwW\nmNlIM6sZdBgREZFMngVaAR2BDcDjua1kZjea2Vwzm7t58+bcVhERkWKquDZKKkAiIlJsufsmd091\n9zTgBaBbHus97+5d3b1r3bp1izakiIgUSrFslFSADm/q1Kl8+aWexyIiEhQzyzyE4UXAoqCyFEeq\nUyJSGgR+j1JuzKyBu28ITxZNAUpLg0XvwOynYdc6qNYIet4MbQdAXPHpJ1NSUpg6dSpVqlThpJNO\nCjqOiEipZ2ZjgL5AHTNbC9wP9DWzjoADq4HfReNYj/9jBEk7tkVjVwBUrVGLP99dtM+8UZ0SkdIi\n8EapKAtQntLS4K2r4KcpkLw3NG/PZvjoj7D4A7j01QI3S3v27OHSSy9l7dq1pKamMnz4cIYNG8Zl\nl13GlClTAHjjjTc4+uijWbNmDddddx2bN2+mbt26jBo1iqZNmzJkyBBq1arFd999R61atZg1axbx\n8fG89tprPPXUU2zcuJG//vWvxMfHU716daZPnx6td0aKysZF8MP7UK0BdBgE5SsHnUhEwtx9UC6z\nX4rFsZJ2bOOmFglR298zq47cdKlOiYjkLvBGqSgLUJ4WvZO1SUqXvBdWToZF70L7gQXa9WeffUbD\nhg355JNPANi5cyfDhg2jWrVqfPPNN4wePZpbb72Vjz/+mKFDhzJ48GCuueYaRo4cyS233MK4ceMA\nWLZsGRMnTiQ+Pp4HHniAKlWqcPvttwPQrl07Pv/8cxo1asSOHdF7orsUkTVfwisXQFpyaHrB23D9\nhGAziUiZoTolIpK74nNNWZBmP52zSUqXvBdm/7fAu27Xrh0TJ05k2LBhzJgxg+rVqwMwaNCgjK+z\nZ88OxZg9myuuuAKAq6++mpkzZ2bsZ+DAgcTHx+d6jF69ejFkyBBeeOEFUlNTC5xVAvLNC4eaJIBf\nvoZ184LLIyJliuqUiEjuAj+jVCzsWle45YfRunVr5s2bx/jx47nrrrs444wzADCzjHUyf59Z5vmV\nK+d9KdZzzz3H119/zSeffELHjh2ZP38+tWvXLnBmKWLx5fM3T0QkBlSnRERypzNKEBq4oTDLD2P9\n+vVUqlSJq666ittvv51vv/0WgLfeeivja8+ePQE46aSTePPNNwF4/fXXOfnkk3PdZ9WqVUlKSsqY\nXrlyJd27d+fBBx+kTp06/PLLLwXOKwHoeTOUr3Jo+thzoH674PKISJmiOiUikjudUYLQB9WP/pj7\n5XcJlaDn0ALveuHChdxxxx3ExcWRkJDAs88+y4ABAzhw4ADdu3cnLS2NMWPGAPDkk09y3XXX8eij\nj2bcJJub888/nwEDBvDBBx/w1FNP8e9//5vly5fj7vTv358OHToUOK8EoEF7GDoHfhwPVRtC6zOD\nTiQiZYjqlIhI7szdg84QFV27dvW5c+dmmbdkyRKOP/74I2+c26h3EGqSWp1aqFHvctO8eXPmzp1L\nnTp1orbP4iDf77dIGTHp50k8+e2TJB1M4pLWl3BTh5vyvISptDCzee7eNegcxVF+6lRxGR5cdUpy\nM2LECACGDy/aIeelZBs9ejRr1qzJc/nGjRsBqF+/fp7rNGvWjMGDB0clTyR1SmeUINQEXfZaaHS7\n2f/N9BylodD2kmL1HCURKRk27tnI7dNuJyUtBYDnvn+OJlWbcEGrCwJOJsVZUT/zSEQkaAcOHAg6\nQp7UKKWLiwsNAV7AYcAjsXr16pgfQ0SCNf/X+RlNUrpvNnyjRklKBNUpEYmWI50JKs5nKnWqREQk\nBtrUboOR9TK7tnXaBpRGREREIqVGSUQkBppWa8q9Pe6lRoUalIsrxyXHXMKA1gOCjiUiIiL5pEvv\nRIqD3Zvhq2egTmvoOCjoNBIllx57KQNaDyA1LZWE+ISg44iIiEgE1CiJBG3lFHjtYvC00PSMx+AP\n84LNJFETZ3HExevkvYiISEmj6l1C9O3bl+zDygK8/PLLDB1a8Oc8STEw/o5DTRLA1hWwbEJweURE\nCkB1SkRKG51RCkvzNMavGs+ri19l456N1K9cn6vbXM05Lc4hzoLtJ1NTUwM9vsTYwaSc83auLfoc\nIhK4fzz0MNt37Ira/mrWqMZA645JAAAgAElEQVTddw2L2v7yojolIqWRGiVCTdKtU27lqw1fsS9l\nHwDb9m/jwdkP8sXqL/h3v38XuFl65JFHSExM5JZbbuFPf/oT33//PZMnT2bSpEmMGjWKc889l3/8\n4x+4O+eeey4PP/wwAFWqVOG2227j888/5/HHH8+yz1GjRvHQQw/RoEEDWrduTYUKFQr3BkiwOg+B\naf88NB1fATpH56FqIlKybN+xixbtzo7a/lYt/PSI66hOiYjkTpfeAeNXjc/SJKXbl7KP2Rtm8+mq\nIxeavJxyyinMmDEDgLlz57J7926Sk5OZOXMmxxxzDMOGDWPy5MnMnz+fOXPmMG7cOAD27NlD27Zt\n+frrrzn55JMz9rdhwwbuv/9+Zs2axRdffMHixYsLnE2KiX53wekPQq1W0LQn/H4GxOt3GCJSNFSn\nRERyp0YJeHXxqzmapHT7UvYxevHoAu+7S5cuzJs3j6SkJCpUqEDPnj2ZO3cuM2bMoEaNGvTt25e6\ndetSrlw5rrzySqZPnw5AfHw8l1xySY79ff311xnblC9fnssuu6zA2aQYqdoQarcKjXpXLjHoNCJS\nhqhOiYjkTo0SsHHPxsMu37RnU4H3nZCQQPPmzRk1ahQnnXQSvXv3ZsqUKaxcuZKmTZvmuV1iYiLx\n8fG5LjOzXOdLCbXwHXjvBlg+Ab59BV45D1KTg04lImWE6pSISO7UKAH1K9c/7PJ6lesVav+nnHIK\njz32GKeccgq9e/fmueeeo2PHjvTo0YNp06axZcsWUlNTGTNmDH369Dnsvrp3787UqVPZunUrycnJ\njB07tlDZpBhY9F7W6R0/wy/fBJNFRMok1SkRkZzUKAFXt7maiuUq5rqsYrmKDG5TuBvre/fuzYYN\nG+jZsyf16tUjMTGR3r1706BBAx566CH69etHhw4d6Ny5MxdeeOFh99WgQQMeeOABevbsyWmnnUbn\nzp0LlU2KgWoNs82wXOaJSFDMbKSZ/WpmizLNq2VmX5jZ8vDXmkFmLCzVKRGRnAK/Y9zMRgLnAb+6\ne9vwvFrAW0BzYDVwqbtvj1WGc1qcw4TVE3IM6FCxXEV6NujJ2S0KNwJR//79SU4+dCnVsmXLMr6/\n4ooruOKKK3Jss3v37izTU6dOzfj+2muv5dprry1UJilGTv4TrJwM21YCFpqu1SLoVCJyyMvAf4HM\nN6zeCUxy93+a2Z3h6UKPw12zRrV8jVQXyf7yQ3VKRCSnwBslirAA5SXO4nii3xN8uupTRi8ezaY9\nm6hXuR6D2wzm7BZnB/4cJSnlqjeCoXNg3bdQtR7UyPueABEpeu4+3cyaZ5t9IdA3/P0rwFSiUKeK\n4plHIiKSP4E3SkVZgA4nzuI4t+W5nNvy3FgeRiR3cfHQ5MSgU4hI/tVz9w0A7r7BzI4KOpCIiERX\ncT1VkqUAASpAIiJS4pjZjWY218zmbt68Oeg4IiISgeLaKOVLfgqQuxdxqrJJ77OIlDGbzKwBQPjr\nr7mt5O7Pu3tXd+9at27dXHek/z+Lht5nEYlUcW2UolKAEhMT2bp1q/5zjDF3Z+vWrSQm6kGpIlJm\nfAhcE/7+GuCDguxEdapoqE6JSEEEfo9SHtIL0D8pRAFq3Lgxa9euRZc7xF5iYiKNGzcOOoaISNSZ\n2RhC983WMbO1wP2E6tPbZnY98DMwsCD7Vp0qOqpTIhKpfDdKZtYaeJbQ/UNtzaw9cIG7/60wAWJZ\ngBISEmjRQsMsi0gwdh/czYcrPyTpYBLntDiHJtWaBB2pVItVnXL3QXks6l+Y/YLqlIhIcRbJGaUX\ngDuA/wG4+wIzewMotgVIRCQoyWnJXP3p1azYsQKAlxa9xBvnvMHRNY8OOFmpFpM6JSIiZVMk9yhV\ncvdvss1LiWYYEZHSYvb62RlNEsC+lH28s/ydABOVCapTIiISNZE0SlvMrBXgAGY2ANgQk1QiIiVc\nOct5wj7e4gNIUqaoTomISNREcundzcDzwHFmtg5YBVwZk1QiIiVc9wbdaV+3PQs2LwCgeoXqXHbs\nZQGnKvVUp0REJGry1SiZWRzQ1d1PM7PKQJy7J8U2mohIyRUfF8+oM0cx6edJ7Dywk9OanUadinWC\njlVqqU6JiEi05atRcvc0MxsKvO3ue2KcSUSkVCgfX56zW5wddIwyQXVKRESiLZJ7lL4ws9vNrImZ\n1Up/xSyZiIhIZFSnREQkaiK5R+m68NebM81zoGX04oiIiBSY6pSIiERNvhsld9cT8UREpNhSnRIR\nkWjKd6NkZgnA/wGnhGdNBf7n7skxyCUiIhIR1SkREYmmSO5RehboAjwTfnUJzxORwtqzBaY8BAvf\nDTqJSEmmOiUiIlETyT1KJ7p7h0zTk83s+2gHEilzfpoGr/4GPC00Pf0RuPnrYDOJlEyqUyKHMXr0\naNasWVPg7dO3HTFiRIG2b9asGYMHDy7w8UWKWiSNUqqZtXL3lQBm1hJIjU0skTJk/O2HmiSAzUth\n+SQ4pn9wmURKJtUpkcNYs2YNq5YtpVGVhAJtn5CSAsDB9Ssj3nbdbl0BKyVPJI3SHcAUM/sJMKAZ\ncG1MUomUJft35Zy3Y3WRxxApBVSnRI6gUZUE/tD+qCI/7lMLfi3yY4oUViSj3k0ys2OAYwkVoKXu\nfiBmyUTKis5Xw/RHD03Hl4dOVweXR6SEUp0SEZFoyvdgDmZ2M1DR3Re4+/dAJTO7KXbRRMqIU++F\nfvdCjebQ+ES4cRqUKx90KpESR3VKRESiKZJR737r7jvSJ9x9O/Db6EeSvPy0eTc9/zGJY+4Zz5n/\nnsa+g7r0vtSo3QrqnQANOkCFqkGnkSiZsXYG571/Hqe+fSrPzX8u6DhlgeqUiIhETST3KMWZmbm7\nA5hZPKBfexehs56YzsFUB+DHTbvp99gUvrr7tIBTSaH98D68k+k2iuVfwB++hfhI/nlKcbNpzyZu\nnnQzTujf7NPfP02lhEoMPkEjPsVQmalTRxq9bOPGjQDUr18/z3U0ApmIyOFFckbpc+BtM+tvZqcC\nY4DPYhNLsluxKSmjSUq3cZcuvS8VFozNOr1jDaz9JpgsEjWvLnk1o0lKN3bZ2DzWlihRnQo7cOAA\nBw6oRoiIFEYkv7IeBtxI6KnnBkwAXoxFKMmpbrUKOebFWQBBJPqqNcg2w6Bq3r8FlpKhRbUWOebV\nrVg3gCRlSpmpU0c6E5T+nJvhw4cXRRwRkVIp32eU3D3N3Z9z9wGErvme7e4xvUnGzFab2UIzm29m\nc2N5rOKuesXy9GhRK8u863vl/CAmJdDJf4KazcMTBr1ugVotg0wkUXBJ60toWrVpxnT5+PLc3/P+\nABOVfqpTIiISTfk+o2RmU4ELwtvMBzab2TR3vy1G2dL1c/ctMT5GifDm73oyc/lm3v12LX/sfwzN\n61QJOpJEQ/XGMHRe6HK7qvXVJJUin1z8CR+v/Ji1u9dyQ7sbKBen+85iSXVKRESiKZKqXd3dd5nZ\nDcAod7/fzBbEKpjk9OWKLfx57Pds2nWAH9bv4rmrutCyrpqlUiG+HDQ7KegUEmW//+L3zFo/C4C3\nf3yb9y54jxqJNQJOVaqpTomISNRE0iiVM7MGwKXAPTHKk50DE8zMgf+5+/NFdNxiJy3NuT3cJAEs\n27Sbv360mFeu6xZwMhHJzeQ1kzOaJIDN+zZz36z7eLL/kwGmKvVUp0REitiRRuE8kvRt0++tjFQs\nR/CMpFF6kNCIQjPdfY6ZtQSWxyTVIb3cfb2ZHQV8YWZL3X16+kIzu5HQjbs0bdo0r32UCkn7U1i/\nc3+WeT9uTAoojYgcyTcbc45cuHLnygCSlCmqUyIiRWzNmjUsW76SytXqFGj7lNTQ6GTrNu2MeNs9\nu2J71XO+GyV3HwuMzTT9E3BJ+rSZ3eXuD0UznLuvD3/91czeB7oB0zMtfx54HqBr166e605KieqV\nEujYpAbzf8l4liJ9j9UIWiLF1cWtL+b1pa9nmdenSZ+A0pQNqlMiIsGoXK0O7Xv8psiPu+CrcTHd\nfyTPUTqSgVHcF2ZW2cyqpn8PnAEsiuYxSppnruzMOe3q07x2Ja7s3pTh57UJOpKI5KF1zdb8ucuf\nqZJQhfJx5enfpD9/OfEvQccq61SnREQk36I5BFO0n+pTD3jfzCCU8w13L5MPDkzXoHoiv+nYiGOO\nqsrJx9ShcgWNoFVqbF4GSz6Aqg2h7SWQkBh0IomCS4+9lEoJldh1cBdntzg76DiiOiUiIhGI5ift\nqF5SEL5kokM091nSjfh4CSNnrQLgP5OW8/Al7bjsRF3zXuL98g28fB6khgbq4PsxMOTjYDNJoSWn\nJTP408H8uP1HAF5c+CJvnPMGLWto+PcAqU6JiEi+RfPSu2j/pk4y2Xcwlde+yjqiyAszVgWURqLq\n6/8dapIAVs+A9d8Fl0ei4qv1X2U0SQB7kvcwdtnYw2whRUB1SkRE8i3fjZKZ9TrCPH0CiCGz0Cuz\nOJX80sFy+WeY2zwpUeJy+Rla9n/EElWqUyIiEk2RfBp76nDz3P0fhY8jeUlMiOfaXi0yps3g//q2\nCjCRRE2P/4NyFQ9Nt+oPDXQ1T0nXo0EP2tZumzFdtXxVLjv2sgATlQmqUyIiEjVHvEfJzHoCJwF1\nzey2TIuqAfGxCiY53Xn2cfQ6ujY/rN/FyUfXoW2j6kFHkmho1Blu/hqWfgxV68PxFwSdSKIgPi6e\nUWeN4os1X7Dr4C7OaHYGdStpSP9YUJ0SEZFYyM9gDuWBKuF1q2aavwsYEItQkrsPvlvH7e98T3Kq\nU7l8PO/ddBLH1q8WdCyJhprNoOfNQaeQKHt50cu8tOglktOSmb52Ov87/X9BRyqtVKdERCTqjtgo\nufs0YJqZvezua460vsTOrW/Nzxiyac/BVC5+5kt+ePCsQDOJSO4Wb13M098/nTH95fov+ftXf+ee\nHvcEmKp0Ko11avTo0axZU/A/Svq2I0aMKND2zZo1Y/DgwQU+vohIaRDJ8OAVzOx5oHnm7dz91GiH\nkpwWr9uZY1zbPQdTA8kiIkf24YoPc8z7cv2XASQpU0pNnVqzZg1LVywnoVbVI6+cixRC9WHlto0R\nb5u8LalAxxQRKW0iaZTGAs8BLwL6hF7Ejq5bJce8hHiNoCVSXPVq1IvXl76eZV7rmq0DSlNmlKo6\nlVCrKrXP6F7kx9064esiP6aISHEUSaOU4u7PxiyJHFb58vH89uQWvDAz9OwkM/jXQI2MJlJc9W7c\nm9OansbEnycC0LBKQ/7e6+8Bpyr1VKdERCRqImmUPjKzm4D3gYynY7r7tqinklzdc14bujSrzhvf\n/MKdZx1PG416J1Ks/bvfv/l247ds2beFfs36kRCXEHSk0k51SkREoiaSRuma8Nc7Ms1zoGX04sjh\n/ObpWcz/ZQcA05fP5MruTfj7Re0DTiUiefnLtL/w6epPAWherTkvn/UytSvWDjhVqaY6JSIiUZPv\nB866e4tcXio+RWTnvoMZTVK6Md/8ElAaETmS7379LqNJAli9azVjlo4JMFHppzolIiLRlO9Gycwq\nmdm94RGFMLNjzOy82EWTzLbuPphjXlr2YfBEpNjYti/n1V5b928NIEnZoTolIiLRlO9GCRgFHCT0\n9HOAtcDfop5IctWybhUqlMv642pUIzGgNCJyJD0b9qRuxboZ03EWx/ktzw8wUZmgOiUiIlETSaPU\nyt0fAZIB3H0foPGpi9DkP/elWe1KVCgXR8fGNZh+R9+gI4lIHiolVGL02aO54rgrOK/lebxw+gt0\nrtc56FilneqUiIhETSSDORw0s4qEbozFzFqRaVQhib1GNSsy7Y5+QceQWFjyESx4G6o1hJNugeqN\ngk4kUbA2aS1fbfiKvcl7aVWjFd0adAs6UmmnOiUiIlETSaN0P/AZ0MTMXgd6AUNiEUqkTFn8Abw9\n+ND0ss9h6FyIj+SfpxQ3v+79lRu/uBEPfWbnP9/+h8T4RK5qc1XAyUo11SkRkSK2ceNG9iTtYcFX\n44r82Ht2bWGj74vZ/iMZ9e4L4GJCRWcM0NXdp8YmlkgZsuDtrNPbV8HaOcFkkah5bfFrGU1SureX\nvZ3H2hINqlMiIhJNkf7KuhEQH97uFDPD3d+LfiyRMqRKvVzmHVX0OSSqmlRtkmNe7UQ9Q6kIqE6J\niBSh+vXrk2o7ad/jN0V+7AVfjaN+veox238kw4OPBEYClwDnh18xHXbVzM4ysx/NbIWZ3RnLY4kE\n5uRboXrTQ9M9bobarYLLI1Ex8NiBNK7SOGM6IS6B+3reF2Ci0k91SkREoimSM0o93L1NzJJkY2bx\nwNPA6YSGeJ1jZh+6++KiylAc7TuYyoJ1O+jeQr+ZLjVqNIU/zIPJD0GznnDsGUEnkij59JJPeWPJ\nG/y862du63Ib5cuVDzpSaac6JSIiURNJozTbzNoUYQHoBqxw958AzOxN4EKgzBag+z5YxOjZa4DQ\neLePDmzPgC45L++REmbpZ/DmZaHvvwTKV4G71wUaSaKjz1t92LY/9ODZMT+O4fOLP6d+lfoBpyrV\nVKdERCRqImmUXiFUhDYSGm7VAHf39jFJFrrO/JdM02uB7jE6VrF38GBqRpMEobFv73pvoRql0uDN\nQVmnD+6GWU9Cr1uCySNR8fR3T2c0SQBpnsagTwYx5bIpAaYq9UpNndq4cSPJu5PYOuHraOwuIsnb\nkth4sMgPK0Vg48aN7NudzFMLfi3yY6/bnUzFjRuL/LgihRFJozQSuBpYCKTFJk4WuT0kMMsQUmZ2\nI3AjQNOmTXNZvfRYsXl3jnnJqZ7LmlLy5PLP6bvX1SiVcJN/mZxj3o4DOwJIUqaoTomISNRE0ij9\n7O4fxixJTmuBzKdLGgPrM6/g7s8DzwN07dq1VHcNbRpVxww805+ySoX44AJJ9JRLhJT9Weed+69g\nskjU/K7d7/jz9D9nmdeyRsuA0pQZpaZO1a9fnz3boPYZRX8hxdYJX1O/li4RLY3q16/PwbQ9/KF9\n0Y+s+tSCXylfX3+vpGTJ96h3wFIze8PMBpnZxemvmCWDOcAxZtbCzMoDlwNFWQCLnWeu6Ez5cqEf\nWdUK5Rh3U6+AE0lU3LESLNM/xUYnQgv9bEu6M1qcwUkNT8qYrpVYi3cveDfARGWC6pSIiERNJGeU\nKhK65jvzkFwOxOT5FO6eYmZDgc8JPRNjpLv/EItjlRRnt2vA0o27mLdmO+d3aMjR9aoGHUmioUIV\nuOo9mP3f0DDhp/816EQSJf/s/U8em/sYOw/s5Mb2NwYdpyxQnRIRkajJd6Pk7tfGMkgexxwPjC/q\n4xZX5z81g4XrdgEwc8VW5q3ZziMDOgScSgrt21fhw6GHpn94D+5ck/f6UiLsT9nPGe+cwf7U0GWV\n09ZO4+n+T3NK41MCTlZ6qU6JiARjz64tLPhqXIG23b9nJwCJlSN/cOyeXVsghg+cPWKjZGZ/cfdH\nzOwpst2kCuDuuuO8COzYezCjSUo37rv1apRKg2mPZJ3evwMWvA3tLw0mj0TFKz+8ktEkpXvqu6fU\nKMWA6pSISHCaNWtWqO3XrAkNdNSoIA1PveqFPv7h5OeM0pLw17kxSyEiIlJwqlMiIgEZPHhwobYf\nMWIEAMOHD49GnKg64mAO7v5R+Nu97v5K5hewN7bxJF2NSuVp3zhrp31R50YBpZGo6jMs63TFmjqb\nVApcc8I1JMYnZpl3Syed2IgF1SkREYmFSAZzuAsYm495EiMfDj2ZJyYuY96a7ZzXvgGXnahncpQK\nna+C6o1g9tNQoymc/mDQiSQKEsslMmHABP41919sP7CdG9vfSPu6sXruqYSpTomISNTk5x6ls4Fz\ngEZm9mSmRdWAlFgFk9zdelrroCNILLTqF3pJqbJk6xJW7lxJ0sEkvt30rRqlGFGdEhGRWMjPGaX1\nhK77vgCYl2l+EvCnWIQSESnptuzbwh8m/4GDaQcBeHze49SrXI+zW5wdcLJSSXVKRESi7oiNkrt/\nD3xvZm+4e3IRZBIRKfHmbpyb0SSl+3L9l2qUYkB1SkREYiGSe5S6mdkDQLPwdga4u7eMRTARkZKs\ndc2cl8keW/PYAJKUKapTIiISNZE0Si8RuoRhHpAamzgiIqVDyxotub3r7Twz/xn2p+7njGZncOmx\nGs0wxlSnREQkaiJplHa6+6cxSyL5smnXfpZuTKJT0xpUS0wIOo5Ey95tMOlBaNgRugwJOo1EyTUn\nXMPlx13OwdSDVC1fNeg4ZYHqlIiIRE0kjdIUM3sUeA84kD7T3b+NeirJ1dtzfuHu9xeSkuZUqVCO\nF6/pSo+WtYOOJYW16H14Z0jo+3nApBHwl5VBJpIoqhBfgQrxFYKOUVaoTomISNRE0ih1D3/tmmme\nA6dGL47kJTk1jb+PX0JKmgOw+0AKD3+2lPdv6hVwMim0j2/NOr13C3z3BnS6Ipg8IiWX6pTIEazb\nncxTC34t0LZb9oVG269TMZKPj4eO26JARxUJTr7/pru7HvISoAMpaezan3Uwp81JB/JYW0qU5H05\n521aVPQ5REo41SmRw2vWrFmhtk9eswaA8g0j30+LKBxfpKjlu1Eys3rAP4CG7n62mbUBerr7SzFL\nJxmqVCjH6cfXY8LiTRnzLu7UKMBEEjVH94cfx2ed1+/uYLKIlGCqUyKHN3jw4EJtP2LECACGDx8e\njTgixV5cBOu+DHwONAxPLwNuzXNtibonLu/In05rzRlt6jHiwhO49bScww9LCTRoDBx3PpRLhEq1\n4YqxUKFK0KlESqKXUZ0SEZEoieQi0zru/raZ3QXg7ilmpuFXi9CGHfsZOesnkvan8MP6nQw6sTFx\ncZFfJyzFUKcrIT4BqjWE+u2CTiNR8kvSL4z+YTRJyUlcdPRFdG/Q/cgbSWGoTomISNRE8il7j5nV\nJnRjLGbWA9gZk1SSq9P/PY3wWA6s27GfTiMmsvCvZwUbSgpvycfw1pWHppd9Bjd/A3HxwWWSQtub\nvJfBnw5my74tAHy66lNGnjmSLvW6BJysVCtVdSp5WxJbJ3xdoG1TkvYCUK5qpQIdl1r1C3RcEZHS\nJJJG6TbgQ6CVmc0C6gIDYpJKcli0bkdGk5Qu6YB+UVoqfD8m6/TWFfDLN9CsZzB5JCpmr5+d0SQB\npHkaH//0sRql2Co1daqwN72vSQrddN+sIA1Prfq66V5EhMgapVbA2UAT4BJCw7Dquq8iUr96YtAR\nJFaqHJW/eVKi1K6Y8xlndSrWCSBJmVJq6pRuuhcRCV4kgzkMd/ddQE3gNOB54NlYhDKzB8xsnZnN\nD7/OicVxSpI6VRJpUrNilnn9j6sbUBqJql5/hGqZRjDs9juo3Sq4PBIVHY/qyNnNz86Ybl6tOZcf\ne3mAicoE1SkREYmaSH7Tln6d17nAc+7+gZk9EP1IGf7t7o/FcP8lzoxhpzLm6zW88+06/nLGsXRv\nlfM31lIC1WwOt3wHq2eGBnM46vigE0mUPNLnEa46/iq2H9hOr0a9KKfBV2JNdUpERKImkjNK68zs\nf8ClwHgzqxDh9lJIc1Zv4+mpK5m3Zjsjxi/m5617g44k0VKuQuh5SmqSSpXXFr/GjRNv5JYpt3DP\nzHs4mHow6EilneqUiIhETSQF5FJCz6c4y913ALWAO2KSKmSomS0ws5FmVjOGxykR0tKcP701n7Xb\n9wGwaN0uHvjoh4BTiUheftr5Ew/PeZg9yXtI8zTGrxrP2GVjg45V2qlOiYhI1OT7OhB33wu8l2l6\nA7ChoAc2s4lAbsPx3EPomvIRhIZ4HQE8DlyXyz5uBG4EaNq0aUGjlAhJ+1MymqR0i9fvCiiNiBzJ\nsm3Lcsxbum1pAEnKDtUpERGJpsAumHf30/Kznpm9AHycxz6eJ3SzLl27dvXc1iktqldKoF2j6ixc\nd+iRIL2O1ghaIsVV53qdSYhLIDktOWNejwY9AkwkkVKdEhEp24rltdtm1iDT5EXAoqCyFCfPXNmZ\n/scdRf1qiQzo0pj7L2gTdCQRycNRlY7iyVOf5ITaJ9CkahNu7Xwr57Y8N+hYEiWqUyIipV9xHYLp\nETPrSOiShtXA74KNUzw0qVWJl4acGHQMiYXtq2HJR1C1AbS5EOITgk4kUdC2dluOrXUsO/fvpGdD\nPUC4lFGdEhEp5Yplo+TuVwedQaTIrJsHo86FlPA9aPPfgKvfO/w2UuwdTDnI6e+czv7U/QBM+mUS\nz532HL0a9Qo4mUSD6pSISOlXLBslkTLlq+cONUkAKyfBhu+hQYfgMkmhvbz45YwmKd1/vv2PGiUR\nESlTRo8ezZo1a/Jcnr5sxIgRea7TrFkzBg8eHPVsR6JGSSRonpa/eVKipKXl/Bm6615+ERGRzCpU\nqBB0hDypURIJWvffh+5PSj0Qmm7eGxp2CjaTFNqQtkN4cdGLHEj/uQJDOw0NMJGIiEjRC+JMULSo\nURIJWpMT4f9mweIPoFpDOOHioBNJFCSWS+Sziz/j8XmPs33/dn7X/nd0qqcGWEREpKRQo1SCrNux\nj799vJgf1u/i5GPqcM85x1O5gn6EpUKdY+CU24NOIVFWp1IdHur9UNAxREREpAD0KbsEufn1b5n/\nyw4A3vj6Z9LSnH9e0j7gVCIiUtRK8s3RIiIlhRqlEmLn3uSMJind1B83B5RGRESKs+J8c7SISEmh\nRqmEqJpYjkY1KrJux6FhpI+tXzXARCIiEhSdCRIRib24oANI/sTFGY8ObE/9aokAHFuvKvef3ybg\nVCIiIiIipZPOKJUgJ7Wqw8xh/di25yBHhRsmERERERGJPp1RKmHKxcepSRIRERERiTE1SiIiIiIi\nItmoURIREREREclGjZKIiIiIiEg2apRERERERESy0ah3IsXB8i9g4Vio2gB63ARV6wWdSERERKRM\nU6MkErSl4+HNQYemfxwPN30FcfHBZRIREREp43TpnUjQ5r+edXrLMlg7J5gsIiIiIgKoURIJXuU6\nOedVymWeiIiIiBSZwGI+qyoAABA7SURBVBolMxtoZj+YWZqZdc227C4zW2FmP5rZmUFlFCkSvf4Y\nujcpXdfroc7RweUREUB1SkSkrAvyHqVFwMXA/zLPNLM2wOXACUBDYKKZtXb31KKPKFIEarWEW74L\nDehQoyk07Bh0IhEJUZ0SESnDAjuj5O5L3P3H/2/v3oPjKs87jn9/tqgasEHCuIT7xXZCQi5cnBBQ\nsJ3ETZpOJkbGxFASF5PgumQgNAPMdGhcau5Dkk4HOs1ASlzuBWxxMQ13kFwBvoBlbBOcBIyDCqHG\nyBCDbZD99I/zqqzXWhWrWp3V7u8z886+5z23Z3V29ez7nnN2e5k1Bbg9IrZGxFrgt8DnBzc6s0G0\ndRPM/x7cMQNuPhmeuyPviMwM5ykzs1pXid96dwDwdMF0Z2ozq07t/wQvLMzq774B93wfDv8SjBid\nb1xmVorzlFWlG2+8kXXr1pWc3zPvkksuKbnMIYccwowZMwY8NrM8lLWjJOkR4KO9zLooIu4ptVov\nbVFi+7OAWQAHH3xwv2I0y91rK3ac3vYe/PfzMGJiPvGY1RDnKbMPr76+Pu8QzAZVWTtKETG5H6t1\nAgcVTB8IvFpi+9cB1wGMHz++1yRlVvEOnwi/efCD6fo94YBj8ovHrIY4T5l9wGeCzHZUiV8Pfi9w\nqqR6SYcB44AlOcdkVj7HzU7ffLc/HHAsnHYb1I/MOyozK815ysysBuR2j5KkZuAaYDRwv6SOiPha\nRKyWdAfwPNANfN/fJGRVbdhw+NO5WTGziuE8ZWZW23LrKEVEC9BSYt5lwGWDG5GZmdkHnKfMzGpb\nJV56Z2ZmZmZmlit3lMzMzMzMzIq4o2RmZmZmZlbEHSUzMzMzM8tFV1cXc+fOZePGjXmHshN3lMzM\nymTp75cy84GZTLt3Gre9cFve4ZiZmVWclpYW1qxZw4IFC/IOZSfuKJmZlcGGzRs4+5GzWfb6MtZ0\nreHyxZfz8LqH8w7LzMysYnR1ddHa2kpE0NbWVnFnldxRMjMrg6WvL2XLti07tLV1tuUUjZmZWeVp\naWkhIgDYvn17xZ1VckfJzKwMxuw1Zqe2sQ1jc4jEzMysMrW3t9Pd3Q1Ad3c37e3tOUe0I3eUzMzK\nYFzjOM45+hzqh9cDMOmgSUz/+PScozIzM6scTU1N1NXVAVBXV0dTU1POEe2oLu8AzMyq1azPzOL0\nT5zOlu4tjPrIqLzDMTMzqyjNzc20trYCMGzYMKZOnZpzRDvyGSUzszLaY7c93EkyMzPrRWNjIxMn\nTkQSEyZMoKGhIe+QduAzSmZmZmZmlovm5mY6Ozsr7mwSuKNkZmZmZmY5aWxsZM6cOXmH0Stfemdm\nZmZmZlbEHSUzMzMzM7Mi7iiZmZmZmZkVcUfJzMzMzMysiDtKZmZmZmZmRdxRMjMzMzMzK5JbR0nS\nKZJWS9ouaXxB+6GSNkvqSOVnecVoZmZmZma1Kc8zSquAqUBbL/NejIijUpk9yHFVvC3vb8s7BDOz\nqjeUB/S6urqYO3cuGzduzDsUM7MhK7eOUkT8KiLW5LX/oajjlY18+SdPcMSPHmDavzzJqxs35x2S\nmVk1G7IDei0tLaxZs4YFCxbkHYqZ2ZBVqfcoHSZpuaRWSSfmHUwliAh+cPtyXlr/DgDL1nUx557V\nOUdlZla9huqAXldXF62trUQEbW1tPqtkZtZPZe0oSXpE0qpeypQ+VnsNODgijgZ+CNwqac8S258l\naZmkZevXry/HU6gYb2/uZt2Gd3doW/lfTn5mZjmp2AG9lpYWIgKA7du3+6ySmVk/lbWjFBGTI+JT\nvZR7+lhna0RsSPVngBeBj5VY9rqIGB8R40ePHl2eJ1Eh9tp9Nz6x3479xeMPH5VTNGZm1aEaB/Ta\n29vp7u4GoLu7m/b29kHZr5lZtam4S+8kjZY0PNUPB8YBL+UbVWX45784mqaxo2jYfTe+8Zn9uPib\nR+YdkpnZkFaNA3pNTU3U1dUBUFdXR1NT06Ds18ys2tTltWNJzcA1wGjgfkkdEfE1YAIwV1I3sA2Y\nHRFv5hVnJTl89Ahu+d4X8g7DzKymSRoNvBkR2ypxQK+5uZnW1lYAhg0bxtSpU3OOyMxsaMrzW+9a\nIuLAiKiPiH1TJ4mImB8RR0bEZyPimIi4L68YzcysdklqltQJHE82oPdgmjUBeE7SCuAuKmxAr7Gx\nkYkTJyKJCRMm0NDQkHdIZmZDUm5nlMzMzCpZRLQALb20zwfmD35EH15zczOdnZ0+m2Rm9v/gjpKZ\nmVmVaWxsZM6cOXmHYWY2pFXclzmYmZmZmZnlzR0lMzMzMzOzIu4omZmZmZmZFVHPr3cPdZLWA+vy\njmOQ7AO8kXcQVhY+ttWplo7rIRFR3b8A3k81lqegtl73tcTHtTrV0nH90HmqajpKtUTSsogYn3cc\nNvB8bKuTj6vVIr/uq5OPa3Xyce2dL70zMzMzMzMr4o6SmZmZmZlZEXeUhqbr8g7AysbHtjr5uFot\n8uu+Ovm4Vicf1174HiUzMzMzM7MiPqNkZmZmZmZWxB2lASKpQdLZ/Vx3nqRpAxTHE5L8rSVVRtIk\nSSfkHYcNnFLvVUlnSLo2j5isujlPWTk5T1Uf5yl3lAZSA9CvBGTWF0l1wCTACahKSBqedwxWk5yn\nrCycp6qP81TGHaWBcyUwRlKHpKslXSBpqaTnJP1Dz0KSZqS2FZJuKlh/gqQnJb3UM2qXRmeekHSX\npBck3SJJad5XJC2XtFLSDZLqiwOSdFqav0rSVQXt35X067Tt6yVdK2mkpLWSdkvL7Cnp5Z5p+/Ak\n7SHp/nSMV0manv6WV0laksrYtOwhkh5Nr4lHJR2c2udJ+qmkx4F/B2YDf5NeXydKOiVte4Wkthyf\nbs2RdKGkc1P9HyU9lupfkXRzH++7TZLmSloMHF+0zZnpPdkKNA3m87Ga4jxlgPNUtXOeGkAR4TIA\nBTgUWJXqXyX79hCRdUYXAhOAI4E1wD5pub3T4zzgzrTsJ4HfpvZJwFvAgWneU8AXgT8GXgE+lpa7\nETgv1Z8AxgP7A78DRgN1wGPASan9ZWBvYDdgEXBtWvcXwEmpPgv4Sd5/16FYgJOB6wum90p/84vS\n9AxgYarfB/xlqp8J3F3wmlgIDE/TFwPnF2xzJXBAqjfk/ZxrqQBfAO5M9UXAkvRe+vtUdnrfpWUD\n+FbBdnreq/sVrPNHQHvPe9LFZSCL85RLwWvBeaqKi/PUwBWfUSqPr6ayHHgWOAIYB3wZuCsi3gCI\niDcL1rk7IrZHxPPAvgXtSyKiMyK2Ax1kie7jwNqI+HVa5t/IElyhzwFPRMT6iOgGbknLfB5ojYg3\nI+J9ssTX4+fAzFSfSZaQbNetBCankbkTI+Kt1H5bwWPPSM3xwK2pfhPZB4wed0bEthL7aAfmSToL\n8OnxwfUMcKykkcBWsg+G44ETgY30/r4D2AbM72V7xxWs8x7ZyKxZuTlP1TbnqermPDVA3FEqDwFX\nRMRRqYyNiH9N7aW+j31r0fq9tW8j6/0Xzu8rhl1pJyLagUMlTSQbIVr1IfZjRdIHg2PJEtEVkub0\nzCpcrNTqBfV3+tjHbODvgIOADkmj+h+x7Yr0we1lsg9pT5KN1n0JGEM24lbKlj4+UPh3GmywOU/V\nMOep6uY8NXDcURo4fwBGpvqDwJmSRgBIOkDSnwCPAt/q+Wchae9+7usFskQxNk1/B2gtWmYxMFHS\nPspuyDstLbMktTcqu/ny5KL1biQbSfIoXT9J2h94NyJuBn4MHJNmTS94fCrVnwROTfXTgf8ssdnC\n1xeSxkTE4oiYA7xBlohs8LQB56fHRWTX5ncAT9P7+64vi4FJkkaley1OKV/YVuOcpwxwnqoRzlMD\noC7vAKpFRGyQ1C5pFfBLstPUT6V7WjcB346I1ZIuA1olbSO75OGMfuxri6SZwJ0piSwFfla0zGuS\n/hZ4nGx07j8i4h4ASZeTvehfBZ4nu768xy3ApXxw+t123aeBqyVtB94H/hq4C6hPN0gOI/vHBHAu\ncIOkC4D1fHBJSbH7gLskTQHOIbthdhzZsX0UWFGuJ2O9WgRcBDwVEe9I2gIs6ut9V0pa52KyDyWv\nkV0G5ctUbMA5T1kB56nq5zw1ABRRk2fSapqkERGxKSWvFuCGiGhJ86YBUyLiO7kGWWUkvQyM77nu\n38zMSnOeGnzOU2Y78xml2nSxpMlk30r0EHA3gKRrgK8Df55jbGZmZs5TZpY7n1EyMzMzMzMr4i9z\nMDMzMzMzK+KOkpmZmZmZWRF3lMzMzMzMzIq4o2RW4SSdJOmTecdhZmbWG+cpq1buKJlVvpOAsiag\n9KNzZmZm/eE8ZVXJHSWrWZIOlfQrSddLWi3pIUkfkTRG0gOSnpG0SNIRkoZLekmZBknbJU1I21kk\naaykiZI6UlkuaWQf+75Q0kpJKyRdmdrOkrQ0tc2XtLukE4Bvkv0wYEeKbaf40vpjJD2dtjFX0qbU\nLklXS1qV9jk9tU+S9LikW4GVki6R9IOCGC+TdG7ZDoCZmfXJecp5ynIWES4uNVmAQ4Fu4Kg0fQfw\nbbJfEB+X2o4DHkv1B4AjgW+Q/cr8RUA9sDbNvw9oSvURQF2J/X4deBLYPU3vnR5HFSxzKXBOqs8D\nphXMKxXfQuC0VJ8NbEr1k4GHyX5Fe1/gd8B+wCTgHeCwgr/Hs6k+DHixMCYXFxcXl8EtzlPOUy75\nFv/grNW6tRHRkerPkP0TPgG4U1LPMvXpcREwATgMuAI4C2glS0YA7cBPJd0CLIiIzhL7nAz8IiLe\nBYiIN1P7pyRdCjSQJbAHi1eUNKKP+I4nu/wB4Fbgx6n+ReC2iNgGvC6pFfgc8DawJCLWpjhelrRB\n0tFkiWp5RGwo8RzMzGxwOE85T1lO3FGyWre1oL6N7B/vxog4qpdlF5GNgO0PzAEuIBvtagOIiCsl\n3U/2i/FPS5ocES/0sh0Bvf3S8zzgpIhYIemMtO1iw/qIrxT1Me+doumfA2cAHwVu2IV9mJlZeThP\n7ch5ygaN71Ey29HbwFpJp8D/Xjf92TRvMdko2faI2AJ0AH9FlpiQNCYiVkbEVcAy4IgS+3gIOFPS\n7mm9vVP7SOA1SbsBpxcs/4c0j4joK76nyS5fADi1YP02YHq6fn002WjjkhKxtQB/RjaSt9NIoZmZ\n5c55ynnKBok7SmY7Ox34rqQVwGpgCkBEbAVeIftHD1niGQmsTNPnpRtRVwCbgV/2tvGIeAC4F1gm\nqQM4P836EVmSexgoHOG7Hbgg3Xg7plR8wHnADyUtIbu2+63U3gI8B6wAHgMujIjfl4jtPeBx4I50\nCYSZmVUe5ynnKRsEiujtzKqZDTVp5G9zRISkU8lumJ3yf61XtI1hwLPAKRHxm3LEaWZmtcl5yoYa\n36NkVj2OBa5VdvfsRuDMXVlZ2Y8FLgRanHzMzKwMnKdsSPEZJbMykfRp4Kai5q0RcVwe8ZiZmRVy\nnjLrmztKZmZmZmZmRfxlDmZmZmZmZkXcUTIzMzMzMyvijpKZmZmZmVkRd5TMzMzMzMyKuKNkZmZm\nZmZWxB0lMzMzMzOzIv8DrlIfiVZmjS8AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 1008x288 with 2 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "f, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 4))\n",
    "sp = sns.stripplot(x='news_category', y=\"sentiment_score\", \n",
    "                   hue='news_category', data=df, ax=ax1)\n",
    "bp = sns.boxplot(x='news_category', y=\"sentiment_score\", \n",
    "                 hue='news_category', data=df, palette=\"Set2\", ax=ax2)\n",
    "t = f.suptitle('Visualizing News Sentiment', fontsize=14)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYwAAAEYCAYAAABPzsEfAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3XtYVXW+P/D3BxAUwQtCXkDFCxvY\nQKgQHjUmtZrBmUlnUkYzM3PM1GdqbCabRj2dqSZHT3X6jZWjZuqYZp608tZ4pquStwYFEpBLGuZd\nFMU7suHz+2Ot7RCBLpC9N+j79Tz7Ya2111rf795uefNda6/PElUFERHR9Xh5ugNERNQ0MDCIiMgS\nBgYREVnCwCAiIksYGEREZAkDg4iILGFgEBGRJQwMIiKyhIFBRESW+Hi6A1akpKTopk2bPN0NIrox\n4ukO0I1pEiOMkydPeroLRES3vCYRGERE5HkMDCIisoSBQUREljAwiIjIEgYGERFZwsAgIiJLGBhE\nRGQJA4OIiCxhYBARkSVNojQI0a1qVVqPBt/nyOR9Db5PujVwhEFERJYwMIiIyBIGBhERWcLAICIi\nSxgYRERkCQODiIgsYWAQEZElDAwiIrKEgUFERJYwMIiIyBIGBhERWcLAICIiSxgYRERkicsCQ0QW\ni8gJEcmusuwlEckTka9F5AMRaeOq9omIqGG5coSxFEBKtWUfA4hV1dsBFAD4owvbJyKiBuSywFDV\nLQBKqi37p6o6zNkdAMJc1T4RETUsT57DGA/gH7U9KSITRSRdRNKLi4vd2C0iIqqJRwJDRGYAcABY\nUds6qrpQVRNVNTEkJMR9nSMiohq5/RatIvIwgJ8DuFtV1d3tExFR/bg1MEQkBcAfANylqhfd2TYR\nEd0YV36tdiWA7QAiReSQiPwawOsAAgF8LCKZIjLfVe0TEVHDctkIQ1UfqGHxW65qj4iIXItXehMR\nkSUMDCIisoSBQUREljAwiIjIEgYGERFZwsAgIiJLGBhERGQJA4OIiCxhYBARkSUMDCIisoSBQURE\nlri9vHlTsiqtR4Pvc2TyvgbfJxGRO3CEQUREljAwiIjIEgYGERFZwsAgIiJLGBhERGQJA4OIiCxh\nYBARkSUMDCIisoSBQUREljAwiIjIEgYGERFZwsAgIiJLGBhERGSJywJDRBaLyAkRya6yLEhEPhaR\nQvNnW1e1T0REDcuVI4ylAFKqLXsGwKeqGgHgU3OeiIiaAJcFhqpuAVBSbfEwAH83p/8O4Beuap+I\niBqWu2+g1F5VjwKAqh4VkdtqW1FEJgKYCABdunS55k4PJic3ZB//bZZrdnurcMUNqADehIrIUxrt\nSW9VXaiqiaqaGBIS4unuEBHd8twdGMdFpCMAmD9PuLl9IiKqJ3cHxjoAD5vTDwNY6+b2iYionlz5\ntdqVALYDiBSRQyLyawCzAdwrIoUA7jXniYioCXDZSW9VfaCWp+52VZtEROQ6jfakNxERNS4MDCIi\nsoSBQUREljAwiIjIEgYGERFZwsAgIiJLGBhERGQJA4OIiCxhYBARkSUMDCIisoSBQURElrj7BkrU\nSLnkJlS8ARXRTYUjDCIisoSBQUREljAwiIjIEgYGERFZwsAgIiJLGBhERGQJA4OIiCxhYBARkSUM\nDCIisoSBQUREljAwiIjIEgYGERFZwsAgIiJLPBIYIvKkiOSISLaIrBSR5p7oBxERWef2wBCRUABP\nAEhU1VgA3gBGubsfRERUN546JOUDoIWI+ADwB3DEQ/0gIiKL3H4DJVU9LCIvA/gOwCUA/1TVf1Zf\nT0QmApgIAF26dHFvJ4nqyCU3oAJ4EypqVDxxSKotgGEAugHoBKCliIypvp6qLlTVRFVNDAkJcXc3\niYioGk/covUeAN+qajEAiMj7APoDWO6BvhCRG+3ates2Hx+fRQBiwW9pNmaVALIdDseEhISEE86F\nngiM7wD8h4j4wzgkdTeAdA/0g4jczMfHZ1GHDh2iQ0JCTnt5eamn+0M1q6yslOLiYvuxY8cWARjq\nXO72hFfVnQBWA9gNYI/Zh4Xu7gcReURsSEjIWYZF4+bl5aUhISGlMEaCV3lihAFV/S8A/+WJtonI\no7wYFk2D+e/0vUEFjyESEZElDAwiavK2bdvWYtWqVa2d8ytWrGg9ffr0Dq5sc8OGDYEff/xxy4bc\n5zPPPOPSPt8oBgYRNXnp6en+GzduvBoYDz74YOmsWbOOubLNzz77LDAtLS2gIfc5d+7cjg25v5qU\nl5fXe1tLgSEin1pZRkRUV2fPnvUaOHBgz8jISHtERETMm2++2TYtLc3/jjvuiIyJiYm+8847Iw4c\nONAMAJKSkiInT54cGhcXFx0eHh67adOmgMuXL8tf/vKXTuvXr28bFRVlf/PNN9vOnTu33dixY7sA\nwPDhw8MffPDBLn379rWFhYXFbdy4MSA1NTW8e/fuMcOHDw939uP9999v1atXryi73R49ZMiQ7qWl\npV4AEBoaGvfkk092stvt0TabzZ6RkdE8Pz/fd9myZSHz589vHxUVZd+0aVONwXHw4EGfe++9t0dk\nZKQ9MjLS7hyR3HPPPT1iYmKie/bsGfPyyy8HA8CUKVNCy8rKvKKiouxDhw7tBgDz5s0LiouLi46K\nirKPHj26q8PhAAC8+uqrweHh4bFJSUmRo0aN6up8rQUFBb79+vWz2Ww2e79+/WyFhYW+zvdgwoQJ\nYX379rVNmjSpc9euXWOPHDniAwAVFRXo0qVL7NGjR697TvuagSEizUUkCECwiLQVkSDzEQ7jojsi\nohvy/vvvt+rQoUN5fn5+bmFhYc79999/9oknnuiydu3afTk5OXsffvjhk0899VSoc32HwyF79uzZ\nO2fOnIPPP/98p+bNm+sf//jHI/fdd9/pvLy83EcfffR09TZKS0t9tm/fXjB79uyDI0eOjJg2bdrx\nwsLCnLy8vBbbtm1rcfToUZ9Zs2Z13LJlS0Fubu7ePn36XHzhhRfaO7cPDg525Obm7h0/fnzx7Nmz\n20dGRl4ZO3Zs8aRJk47n5eXlpqSknK/ptU2aNKlLcnLyufz8/NycnJzcPn36XAaAFStWFOXk5OzN\nzMzMXbBgQftjx455z5s377Cfn19lXl5e7rp1677dvXt389WrVwelp6fn5eXl5Xp5een8+fPbFRUV\nNXv55Zc77ty5c29aWlpBYWFh86rtjR49+lRBQUHuyJEjT02ePLmz87l9+/Y137p1a8Fbb711cMSI\nEacWLVoUBABr165tFR0dfaljx46O6/1bXS9RHgMwFUY47AIg5vKzAN643s6JiK6nT58+l2bMmNF5\n8uTJocOGDStt166do7CwsMXgwYNtAFBZWYmQkJCrx1FSU1NPA0D//v0vTJs2zddKGz/72c/OeHl5\noU+fPhfbtWtXnpSUdAkAbDbbpX379vkdOHDAd9++fc2TkpKiAKC8vFwSEhKuhsDo0aNPA0BSUtLF\ndevWtbX62rZt2xa4evXqbwHAx8cH7dq1qwCAOXPmtN+4cWMbADh27FiznJyc5h06dLhQddtNmzYF\nZmdn+8fHx0cDwOXLl71uu+02R1paWsu+ffuea9++fQUA/PKXvzxdUFDQHAAyMjJa/uMf/9gHAJMn\nTy557rnnwpz7u//++0/7+Bi/8idPnnxy6NChPZ999tkTixcvDh43btxJK6/nmoGhqn8F8FcReVxV\nX7OyQyKiurj99tvLdu/enbtmzZrWM2bMCB04cODZnj17XsrMzMyraf3mzZsrYPwCrqiokJrWqW0b\nb29v+Pr6Xv1ar5eXFxwOh3h7e+udd955dv369d9ep011OByW2qzNhg0bAjdv3hyYnp6eFxgYWJmU\nlBR56dKlHxztUVVJTU099cYbbxyuunzZsmVt6tNuQEBApXO6Z8+e5cHBwY5169YFZmRktPzwww/3\nW9mHpXMYqvqaiPQXkdEiMtb5qE+niYiqKioqahYYGFg5ZcqUkqlTpx5PT09vWVJS4vPJJ5+0BICy\nsjJJT0+/5j1zWrVqVXH+/Pl6f4ln4MCBF9LT0wOys7P9AODcuXNeX3/9td+1tgkMDKw4d+6c97XW\nGTBgwLmXXnopBAAcDgdKSkq8zpw54926deuKwMDAyoyMjOZZWVlXv2nl4+OjZWVlAgApKSlnN2zY\n0Pbw4cM+AHD8+HHvgoIC3+Tk5As7d+4MLC4u9i4vL8fatWuvjnh69+59YdGiRW0BYMGCBUGJiYk1\nHioDgPHjxxdPmDCh29ChQ0ucI4/rsXrS+20ALwO4E8Ad5iPRUgtERNewa9euFr169YqOioqyz5kz\np+MLL7xw5N133933zDPPhEVGRtpjYmLsmzdvvua3kYYMGXKuoKCghfOkd1370KlTJ8eCBQuKRo0a\n1d1ms9kTEhKi9uzZc82QGj58+JmNGze2udZJ77/97W/fbd68OdBms9ljY2Ptu3fvbjF8+PBSh8Mh\nNpvNPn369E7x8fFXD0U9+OCDxdHR0fahQ4d2S0hIuDxz5szDd999t81ms9kHDx5sO3jwYLNu3bqV\nP/nkk0fvuOOO6AEDBkTabLZLrVu3rnC29/bbbwfbbDb7ypUr282bN+9gbf1/4IEHSi9evOg9ceLE\nU1bfJ1G9/kWXIrIXgF2trOwCiYmJmp5ee7kpV5WW3jar4W/TMTJ5X4PvsyG44j10xfsHNM73kJ9B\nSyQrK6soPj7e0vFyql1paalX69atK8vLy/GTn/yk57hx406OHTv2TF32sWXLFv8nn3yy865du/Jr\nWycrKys4Pj4+3DlvtTRINoAOAI7WpUNERNTwpk2b1mnLli2tysrK5K677jo7ZsyYOoXF9OnTOyxd\nujRkyZIlNZ6zqY3VwAgGkCsiXwEocy5U1aG1b0JEdGv4wx/+0GHt2rVBVZcNGzasZM6cOS65eHDh\nwoWHbmT7WbNmHavPhY1WA+NPdd0xEdGtYs6cOcdcFQ6NiaXAUNXNru4IERE1bpYCQ0TOAXCe8PYF\n0AzABVVt5aqOERFR42J1hBFYdV5EfgEgySU9IiKiRqleF7qo6ocABjdwX4iIblknT570nj17dohz\nvqioqFlKSkp3T/apOquHpO6vMusF46I93jWLiDziYHJyQkPur3Na2q6G3F99nDp1yvutt9667Zln\nnikGgPDw8PJNmzZZKtnhLlZHGPdVefwEwDkAw1zVKSKixiY/P9+3e/fuMaNGjeras2fPmAEDBkSc\nP39ecnJy/JKTkyNiYmKiExISIjMyMpoDQE5Ojl98fHxUbGxs9NSpUzv5+/v3BoyL7vr162dzlktf\nvnx5GwD4/e9/H3bw4EG/qKgo+2OPPRaWn5/vGxEREQMAt99+e1TV8ihJSUmRaWlp/mfPnvVKTU0N\nj42NjY6Ojr66L1exWkvqkSqPR1X1RVU94cqOERE1Nt99913zJ5544sQ333yT07p164ply5a1nTBh\nQtd58+Z9l5OTs/ell146NHny5C4A8Jvf/KbzlClTTmRnZ+/t1KnT1Wq7/v7+lRs3bvwmNzd37+bN\nmwumT58eVllZiVdeeeVQ586dy/Ly8nIXLFjwvesshg8fXrJixYogADhw4ECzEydONEtOTr44ffr0\njoMGDTqbnZ29Ny0tLX/mzJlhZ8+eddmN8azWkgoTkQ9E5ISIHBeRNSISdv0tiYhuHqGhoWX9+/e/\nBAC9e/e+WFRU5JeRkRGQmpraIyoqyj5lypSuJ06caAYAGRkZAePHjy8BgAkTJlyt11RZWSlTp04N\ns9ls9kGDBtlOnDjhe+jQoWueHhg7duxpZ1n1ZcuWtb3vvvtOA8AXX3zR6tVXX+0YFRVlv/POOyPL\nysrkm2++sVTyvT6sXri3BMA7AFLN+THmsntd0Skiosaoaml0b29vPX78uE9gYKAjLy8v1+o+FixY\nEHTq1CmfPXv27PXz89PQ0NC4msqbV9WtW7fyNm3aOHbu3Nni/fffD1qwYMEBAFBVrF69+pv4+Piy\na23fUKwOXUJUdYmqOszHUgAh19uIiOhm1qpVq8qwsLArixcvbgsYN3vavn17CwDo1avX+aVLl7YF\ngMWLF18tG1JaWuodHBxc7ufnp+vXrw88cuSILwC0bt264sKFC7X+Th4xYkTJrFmzOpw7d87beQOo\nQYMGnX3llVfaV1Yat7rYunVrC5e9WFgPjJMiMkZEvM3HGACWS+ISEd2sVq5cuX/JkiXBznuSr1mz\npg0AvPbaawdfe+219nFxcdFHjx5tFhAQUAEAEyZMKMnKymoZGxsbvXz58qBu3bpdBoAOHTpUJCQk\nnI+IiIh57LHHfnDIf8yYMac3btwYNGzYsBLnstmzZx9xOBwSFRVlj4iIiJk5c2Zo9e0aktVDUuMB\nvA7gVRhfp90G4JH6NioibQAsAhBr7m+8qm6v7/6I6Nbiia/BRkZGXiksLMxxzj///PPHndNpaWmF\n1dcPDw8vz8zMzPPy8sLChQvbxsXFXQCAjh07Omq7m2D1O/5Vba9z584Oh8PxvdcdEBCg77zzzoH6\nv6q6sRoYLwB4WFVPA4CIBMG4odL4erb7VwCbVHWEiPgC8K/nfoiIGqWtW7f6//a3v+2iqmjVqlXF\n0qVLizzdpxtlNTBud4YFAKhqiYj0rk+DItIKwI8AjDP3dQXAlfrsi4iosUpJSTmfn59v+WR4U2D1\nHIaXiFy97aE5wrAaNtV1B1AMYImIZIjIIhFpeb2NiIjIs6z+0n8FwDYRWQ3jnMOvALx4A232AfC4\nqu4Ukb8CeAbAf1ZdSUQmApgIAF26dKlnU3QzmvHJDd07pkYv3sPLioiux+qV3ssADAdwHMbo4H5V\nfbuebR4CcEhVd5rzq2EESPU2F6pqoqomhoTwG7xERJ5m+bCSquYCuOHjcap6TEQOikikquYDuLsh\n9ktERK7lspoj1/E4gBUi8jWAXgBmeagfRERu89///d8hr7/+ejsAmDt3bruioqJmzudGjhzZddeu\nXc1r39rz6nvi+oaoaiaMEulERHW2Kq1Hg5Y3H5m8zy3XdTz99NPFzunly5cH9+rV61J4eHg5AKxa\ntcpt11PUl6dGGERETUp+fr5vt27dYu6///5wm81mT0lJ6X7u3DmvtWvXBkZHR9ttNps9NTU1/NKl\nSwIAU6ZMCe3Ro0eMzWazT5w4MQwAfve733V69tln2y9ZsqRtdna2/9ixY7tHRUXZz58/L0lJSZFb\ntmzxnzNnTsikSZOufgtj7ty57R5++OHOADBv3ryguLi46KioKPvo0aO7OhwOt74HDAwiIouKioqa\nT5o0qbigoCA3MDCw8oUXXmj/2GOPdVu1atW+goKCXIfDgZdeeink+PHj3h999FHbwsLCnIKCgtxZ\ns2YdrbqfRx555HRsbOzFZcuW7c/Ly8sNCAi4WtTwoYceOv3RRx9dva/F6tWrg0aPHn169+7dzVev\nXh2Unp6el5eXl+vl5aXz589v587Xz8AgIrKoQ4cOV3784x9fAICHHnro1ObNmwPDwsLKbr/99jIA\nGDdu3Kkvv/wyMCgoqMLPz69y1KhRXf/+97+3CQgIqLTaRqdOnRydO3cu+/TTT1seO3bMe//+/c3v\nvffe85s2bQrMzs72j4+Pj46KirJ/+eWXrfbv3+/nqtdaE4+cwyAiaopExNJ6zZo1Q2Zm5t5169a1\nevfdd9v+7W9/u23Hjh0FVtsZMWLE6ZUrV7aNioq6PGTIkNNeXl5QVUlNTT31xhtvHK73C7hBHGEQ\nEVl09OhR308++aQlALzzzjtBAwcOPHv48GHf7OxsPwBYtmxZu+Tk5HOlpaVeJSUl3iNHjiydP3/+\nwb179/6gXl5AQEBFaWmpd03tjBkz5vSmTZvavvfee0GjR48uAYCUlJSzGzZsaHv48GEfADh+/Lh3\nQUGBy26WVBMGBhGRRd27d7+8ePHidjabzX769GmfmTNnnpg/f35RampqD5vNZvfy8sJTTz1VfObM\nGe+UlJQIm81mT05Ojvzzn/98sPq+xo4de/Lxxx/v6jzpXfW5kJCQioiIiEuHDx/2GzRo0EUASEhI\nuDxz5szDd999t81ms9kHDx5sO3jwYLPq+3UlUdXrr+VhiYmJmp6eXuvzB5OTXdLutllHGnyfI5P3\nNfg+G4Ir3kNXvH8A8HXZ5gbf542WBuFn0BLJysoqio+PP+mpDtyI/Px835///OcRVUuO3+yysrKC\n4+Pjw53zHGEQEZElDAwiIguq30DpVsTAICIiSxgYRERkCQODiIgsYWAQEZElDAwiIjfKz8/3nT9/\nflB9tvX39+/d0P2pC5YGIaImZ8Ynhxq0vPmL94S5pbw5ABQWFvqtWrUqaNKkSSXVnysvL0ezZm69\nFq9OOMIgIrIgPz/ft3v37jGjRo3q2rNnz5gBAwZEnD9/XnJycvySk5MjYmJiohMSEiIzMjKaA8Dw\n4cPDlyxZ0ta5vXN0MGPGjND09PSAqKgo+3PPPXfb3Llz2w0ZMqT74MGDeyYnJ9tKS0u9+vXrZ7Pb\n7dE2m82+fPnyNrX1yd04wiAisui7775rvnz58v39+/c/8NOf/rT7smXL2r799tvBCxcuPBAXF1f2\n2WeftZw8eXKXaxUafPHFFw+/8sor7T///PNvAON+F7t37w74+uuvc9q3b19RXl6OjRs3fhMUFFR5\n9OhRn759+0aNHj36jJeX5/++Z2AQEVkUGhpa1r9//0sA0Lt374tFRUV+GRkZAampqT2c61y5csVa\nSdsqkpOTz7Zv374CACorK2Xq1KlhO3bsCPDy8sKJEyd8Dx065NOlSxf33i2pBgwMIiKLfH19rxbf\n8/b21uPHj/sEBgY68vLycquv6+PjoxUVFQCAyspKlJeX1xok/v7+V++XsWDBgqBTp0757NmzZ6+f\nn5+GhobGXbp0yfPDC/AcBhFRvbVq1aoyLCzsyuLFi9sCRjBs3769BQB07dr1yq5du/wBYMWKFW0c\nDocAQOvWrSvOnz9fY1lzACgtLfUODg4u9/Pz0/Xr1wceOXLErSXMr4WBQUR0A1auXLl/yZIlwZGR\nkfaIiIiYNWvWtAGAxx9/vHjbtm2BcXFx0Tt27GjZokWLSgBISkq65OPjo5GRkfbnnnvutur7mzBh\nQklWVlbL2NjY6OXLlwd169btsrtfU214SIqImhx3fg3WqXrxweeff/64czotLa2w+vqdO3d2ZGVl\n5TnnnXfK8/Pz0+3bt1c/KX7KOdGxY0dHZmZmHmpw8eLFjBt4CTeMIwwiIrKEgUFERJYwMIiIyBKP\nBYaIeItIhohs8FQfiMjtKisrK+t8nQK5n/nvVFl1mSdHGL8FsNeD7ROR+2UXFxe3Zmg0bpWVlVJc\nXNwaQHbV5R75lpSIhAH4GYAXAfzOE30gIvdzOBwTjh07tujYsWOx4CHxxqwSQLbD4ZhQdaGnvlb7\n/wA8DSDQQ+0TkQckJCScADDU0/2g+nF7YIjIzwGcUNVdIjLwGutNBDARALp06eKm3hHd/GZ8csgl\n+33xnjCX7JcaD08MCQcAGCoiRQDeBTBYRJZXX0lVF6pqoqomhoSEuLuPRERUjdsDQ1X/qKphqhoO\nYBSAz1R1jLv7QUREdcOTTkREZIlHa0mp6hcAvvBkH4iIyBqOMIiIyBIGBhERWcLAICIiSxgYRERk\nCQODiIgsYWAQEZElDAwiIrKEgUFERJYwMIiIyBIGBhERWcLAICIiSxgYRERkiUeLD96KePMaImqq\nOMIgIiJLGBhERGQJA4OIiCxhYBARkSUMDCIisoSBQUREljAwiIjIEgYGERFZwsAgIiJLGBhERGQJ\nA4OIiCxhYBARkSVuDwwR6Swin4vIXhHJEZHfursPRERUd56oVusA8HtV3S0igQB2icjHqprrgb4Q\nEZFFbh9hqOpRVd1tTp8DsBdAqLv7QUREdePRcxgiEg6gN4CdNTw3UUTSRSS9uLjY3V0jIqJqPBYY\nIhIAYA2Aqap6tvrzqrpQVRNVNTEkJMT9HSQiou/xSGCISDMYYbFCVd/3RB+IiKhuPPEtKQHwFoC9\nqvo/7m6fiIjqxxMjjAEAHgIwWEQyzcdPPdAPIiKqA7d/rVZVvwQg7m6XiIhuDK/0JiIiSxgYRERk\nCQODiIgsYWAQEZElDAwiIrKEgUFERJYwMIiIyBIGBhERWcLAICIiSxgYRERkCQODiIgsYWAQEZEl\nDAwiIrKEgUFERJYwMIiIyBIGBhERWcLAICIiSxgYRERkCQODiIgsYWAQEZElDAwiIrKEgUFERJYw\nMIiIyBIGBhERWcLAICIiSzwSGCKSIiL5IvKNiDzjiT4QEVHduD0wRMQbwBsAhgCwA3hAROzu7gcR\nEdWNJ0YYSQC+UdX9qnoFwLsAhnmgH0REVAeiqu5tUGQEgBRVnWDOPwSgr6r+ptp6EwFMNGcjAeS7\ntaN1EwzgpKc70cTxPbxxjf09PKmqKZ7uBNWfjwfalBqW/SC1VHUhgIWu786NE5F0VU30dD+aMr6H\nN47vIbmaJw5JHQLQucp8GIAjHugHERHVgScC418AIkSkm4j4AhgFYJ0H+kFERHXg9kNSquoQkd8A\n+D8A3gAWq2qOu/vRwJrEobNGju/hjeN7SC7l9pPeRETUNPFKbyIisoSBQURElty0gSEibURkSj23\nXWpeL9IQ/fhCRPhVx3oSkYEi0t/T/WiKavvsicg4EXndE32ipu2mDQwAbQDUKzCocRARHwADATAw\n6sgswUPUoG7mwJgNoIeIZIrISyIyTUT+JSJfi8hzzpVEZKy5LEtE3q6y/Y9EZJuI7HeONsy/dr8Q\nkdUikiciK0REzOfuFpEMEdkjIotFxK96h0TkAfP5bBGZU2X5r0WkwNz3myLyuogEisi3ItLMXKeV\niBQ55xsjEWkpIhvN9zJbREaafZ4jIl+Zj57mul1F5FPzvf9URLqYy5eKyP+IyOcAVgGYBOBJ898x\nWURSzX1nicgWD75clxGRp0XkCXP6VRH5zJy+W0SWX+NzdF5EnheRnQD6VdvnI+ZnbDOAAe58PXQT\nUdWb8gEgHEC2Of1jGF85FBghuQHAjwDEwCg5EmyuF2T+XArgPXNdO4zaV4Dx124pjIsNvQBsB3An\ngOYADgKwmestAzDVnP4CQCJlq2BiAAAG8ElEQVSATgC+AxAC4+vMnwH4hbm8CEAQgGYA0gC8bm67\nBMAvzOmJAF7x9Pt6nfd8OIA3q8y3Nl/bDHN+LIAN5vR6AA+b0+MBfFjlvd8AwNuc/xOAp6rscw+A\nUHO6jadfs4vex/8A8J45nQbgK/Oz8V/m4wefI3NdBfCrKvtxfvY6VtnGF8BW52eMDz7q8riZRxhV\n/dh8ZADYDSAKQASAwQBWq+pJAFDVkirbfKiqlaqaC6B9leVfqeohVa0EkAkjmCIBfKuqBeY6f4cR\nSFXdAeALVS1WVQeAFeY6SQA2q2qJqpbDCCqnRQAeMacfgREgjdkeAPeYI4pkVS01l6+s8tP5l28/\nAO+Y02/DCF6n91S1opY2tgJYKiKPwriO52a0C0CCiAQCKIPxh0kigGQAZ1Dz5wgAKgCsqWF/fats\ncwXGyI2ozm6VwBAAf1HVXuajp6q+ZS6v7UKUsmrb17S8AsZfeTXVx6qpD3VZDlXdCiBcRO6C8Rd3\ntoV2PMYMzAQYwfEXEXnW+VTV1WrbvMr0hWu0MQnATBjlZTJFpF39e9w4mX84FMH4I2EbjFHGIAA9\nYIwUanP5GkHLC67oht3MgXEOQKA5/X8AxotIAACISKiI3AbgUwC/cv7SEZGgeraVB+MXe09z/iEA\nm6utsxPAXSISbJ6QfMBc5ytzeVvzJO/watstg/GXeWMfXUBEOgG4qKrLAbwMoI/51MgqP7eb09tg\nlIUBgAcBfFnLbqv+O0JEeqjqTlV9FkZl1s61bNfUbQHwlPkzDca5nEwAO1Dz5+hadgIYKCLtzHNg\nqa7rNt3MPFGt1i1U9ZSIbBWRbAD/gHH4Y7t5jvo8gDGqmiMiLwLYLCIVMA5ZjatHW5dF5BEA75m/\n9P8FYH61dY6KyB8BfA5jVPGRqq4FABGZBeM/9REAuTDOkzitAPBn/PuwTmMWB+AlEakEUA5gMoDV\nAPzME7FeMH7BAcATABaLyDQAxfj3obfq1gNYLSLDADwO4wR4BIz38FMAWa56MR6WBmAGgO2qekFE\nLgNIu9bnqDbmNn+CEdZHYRyWvVkP55ELsTRIIyAiAap63gybD2DU1/rAfG4EgGGq+pBHO1lPIlIE\nINF5noiImq6bdoTRxPxJRO6B8W2rfwL4EABE5DUYt7L9qQf7RkQEgCMMIiKy6GY+6U1ERA2IgUFE\nRJYwMIiIyBIGBjVaIvILEbF7uh9EZGBgUGP2Cxi1vFxGWNWVyDIGxi1IRMJFZK9ZGTdHRP4pIi1E\npIeIbBKRXSKSJiJRIuItRsVeEeMeI5Ui8iNzP2ki0lNE7jKryWaKUbE38BptP21WWs0SkdnmskfF\nqCScJSJrRMRfjHtgDIVxIWCm2bcf9M/cvoeI7DD38byInDeXixiVirPNNkeayweKyOci8g6APSLy\ngoj8tkofXxSzWiwRVeHp6od8uP8Bo2CiA0Avc/5/AYyBceV0hLmsL4DPzOlNMCr7/hzGVewzAPjB\nKLgIGFdjDzCnAwD41NLuEBglQfzNeWd14HZV1vkzgMfN6aUARlR5rrb+bQDwgDk9CcB5c3o4gI9h\nXNXcHkYdpo4wqg5fANCtyvux25z2ArCvap/44IMP48EL925d36pqpjm9C8Yvzf4wyps413He0yMN\nRkXUbgD+AuBRGPWL/mU+vxXA/4jICgDvq+qhWtq8B8ASVb0IfK86cKyI/BnGTa8CYNT++h6zDlht\n/esH4/AVYJSAedmcvhPASjUK8h037wVxB4CzMKoOf2v2o0hETolIbxjBkqGqp2p5DUS3LAbGrat6\n1d32AM6oaq8a1nUWv+sE4FkA02D8lb4FAFR1tohshHFF+g4RuUdV82rYT23VgZfCuKdDloiMM/dd\nndc1+leba1URrl4RdxGMOmIdACyuQxtEtwyewyCnswC+FZFU4Orx/3jzuZ0w/rqvVNXLMKqmPgYj\nSJwVZPeo6hwA6TDuN1KTf8KoGuxvbuesDhwI4KhZSfXBKutfrVSrqtfq3w78u8rvqCrbbwEw0jwP\nEwJjlPRVLX37AEAKjBHID0Y4RMTAoO97EMCvRSQLQA6AYQCgqmUw7ii4w1wvDcYv8j3m/FTzxHIW\ngEswqgP/gKpuArAOQLqIZMIo3w0A/wkjlD6GUSre6V0A08wT6T1q6x+AqQB+JyJfwThH4az2+wGA\nr2FUtP0MwNOqeqyWvl2BUQH2f7X2e0oQ3dJYS4qaPHPEcklVVURGwTgBPux621XbhxeMst+pqlro\nin4SNXU8h0E3gwQAr4txNvwMjHuEW2ZeHLgBwAcMC6LacYRBDU5E4mDcp7uqMlXt64n+EFHDYGAQ\nEZElPOlNRESWMDCIiMgSBgYREVnCwCAiIksYGEREZMn/B5JpsJb/6XLqAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 395.5x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "fc = sns.factorplot(x=\"news_category\", hue=\"sentiment_category\", \n",
    "                    data=df, kind=\"count\", \n",
    "                    palette={\"negative\": \"#FE2020\", \n",
    "                             \"positive\": \"#BADD07\", \n",
    "                             \"neutral\": \"#68BFF5\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Most Negative Tech News Article: The maker of world's cheapest smartphone 'Freedom 251' priced at ₹251, Ringing Bells' founder Mohit Goel was arrested along with two more people by the Delhi Police on Sunday. The three were allegedly trying to extort money in lieu of settling a rape case. Last year, Goel was arrested over allegations of fraud and an alleged non-payment of ₹16 lakh.\n",
      "\n",
      "Most Positive Tech News Article: The American Automobile Association has launched a contest to find the first couple to get married in one of its self-driving shuttles in Las Vegas. The contestants will have to write a 400-word essay describing how an autonomous vehicle would have changed their road trip experience with their partner. The winning couple will be married on June 30.\n"
     ]
    }
   ],
   "source": [
    "pos_idx = df[(df.news_category=='technology') & (df.sentiment_score == 6)].index[0]\n",
    "neg_idx = df[(df.news_category=='technology') & (df.sentiment_score == -15)].index[0]\n",
    "\n",
    "print('Most Negative Tech News Article:', news_df.iloc[neg_idx][['news_article']][0])\n",
    "print()\n",
    "print('Most Positive Tech News Article:', news_df.iloc[pos_idx][['news_article']][0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Most Negative World News Article: Slamming Canadian Prime Minister Justin Trudeau's comments on US tariffs during the G7 summit, US President Donald Trump's trade adviser Peter Navarro said, \"Trudeau deserves a special place in hell.\" Navarro also accused Trudeau of backstabbing Trump. The Canadian PM had called US tariffs \"insulting\", saying the country won't be pushed around and plans to apply retaliatory tariffs.\n",
      "\n",
      "Most Positive World News Article: Pope Francis on Sunday said he is praying that the upcoming summit between US President Donald Trump and North Korean leader Kim Jong-un succeeds in laying the groundwork for peace. Urging people around the world to pray for the summit, the pontiff said, \"I want to offer the beloved people of Korea an especial thought of friendship.\"\n"
     ]
    }
   ],
   "source": [
    "pos_idx = df[(df.news_category=='world') & (df.sentiment_score == 16)].index[0]\n",
    "neg_idx = df[(df.news_category=='world') & (df.sentiment_score == -12)].index[0]\n",
    "\n",
    "print('Most Negative World News Article:', news_df.iloc[neg_idx][['news_article']][0])\n",
    "print()\n",
    "print('Most Positive World News Article:', news_df.iloc[pos_idx][['news_article']][0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "from textblob import TextBlob\n",
    "\n",
    "sentiment_scores_tb = [round(TextBlob(article).sentiment.polarity, 3) for article in news_df['clean_text']]\n",
    "sentiment_category_tb = ['positive' if score > 0 \n",
    "                             else 'negative' if score < 0 \n",
    "                                 else 'neutral' \n",
    "                                     for score in sentiment_scores_tb]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"8\" halign=\"left\">sentiment_score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>news_category</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>sports</th>\n",
       "      <td>25.0</td>\n",
       "      <td>0.084040</td>\n",
       "      <td>0.149114</td>\n",
       "      <td>-0.200</td>\n",
       "      <td>-0.01700</td>\n",
       "      <td>0.075</td>\n",
       "      <td>0.15900</td>\n",
       "      <td>0.381</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>technology</th>\n",
       "      <td>24.0</td>\n",
       "      <td>0.010458</td>\n",
       "      <td>0.203315</td>\n",
       "      <td>-0.500</td>\n",
       "      <td>-0.07525</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.05925</td>\n",
       "      <td>0.500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>world</th>\n",
       "      <td>25.0</td>\n",
       "      <td>0.120760</td>\n",
       "      <td>0.221134</td>\n",
       "      <td>-0.296</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.075</td>\n",
       "      <td>0.21100</td>\n",
       "      <td>0.700</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              sentiment_score                                             \\\n",
       "                        count      mean       std    min      25%    50%   \n",
       "news_category                                                              \n",
       "sports                   25.0  0.084040  0.149114 -0.200 -0.01700  0.075   \n",
       "technology               24.0  0.010458  0.203315 -0.500 -0.07525  0.000   \n",
       "world                    25.0  0.120760  0.221134 -0.296  0.00000  0.075   \n",
       "\n",
       "                               \n",
       "                   75%    max  \n",
       "news_category                  \n",
       "sports         0.15900  0.381  \n",
       "technology     0.05925  0.500  \n",
       "world          0.21100  0.700  "
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame([list(news_df['news_category']), sentiment_scores_tb, sentiment_category_tb]).T\n",
    "df.columns = ['news_category', 'sentiment_score', 'sentiment_category']\n",
    "df['sentiment_score'] = df.sentiment_score.astype('float')\n",
    "df.groupby(by=['news_category']).describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>news_category</th>\n",
       "      <th>sentiment_score</th>\n",
       "      <th>sentiment_category</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>technology</td>\n",
       "      <td>-0.058</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>technology</td>\n",
       "      <td>0.119</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>technology</td>\n",
       "      <td>-0.022</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>technology</td>\n",
       "      <td>0.363</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>technology</td>\n",
       "      <td>0.078</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  news_category  sentiment_score sentiment_category\n",
       "0    technology           -0.058           negative\n",
       "1    technology            0.119           positive\n",
       "2    technology           -0.022           negative\n",
       "3    technology            0.363           positive\n",
       "4    technology            0.078           positive"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYwAAAEYCAYAAABPzsEfAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3XlcVXX+P/DXGxAQwQUhFxBXLqu5\nQPhVY1KrGZwp7Zvx1cxsGcft0WIz2TjZr5lqcrRs+n1bTK3USMecbHFrnKmclNwaFBkBWVJxV1AU\nxQW58P7+cc51iAAPyL0X9PV8PO6Dc88953w+94q87ucs7yOqCiIioqvxcHcHiIioeWBgEBGRJQwM\nIiKyhIFBRESWMDCIiMgSBgYREVnCwCAiIksYGEREZAkDg4iILPFydwesSEpK0vXr17u7G0R0bcTd\nHaBr0yxGGCdPnnR3F4iIbnjNIjCIiMj9GBhERGQJA4OIiCxhYBARkSUMDCIisoSBQUREljgtMERk\nkYgUikhmtfmPi0iuiGSJyCvOap+IiBqXM0cYSwAkVZ0hIkMBjARws6rGAJjrxPaJiKgROS0wVHUT\ngOJqs6cAmK2qZeYyhc5qn4iIGperS4PYACSKyMsALgF4WlX/VdOCIjIRwEQACAsLc10PiZqQFak9\nG32boxP3Nvo26cbg6oPeXgDaAfgvANMB/FVEaqwvo6oLVTVeVeODg4Nd2UciIqqBqwPjMIBP1fAd\ngEoAQS7uAxERNYCrA+NzAMMAQERsALwBsLIgEVEz4LRjGCKyHMAQAEEichjA7wEsArDIPNX2MoCH\nVFWd1QciImo8TgsMVb2/lpfGOatNIiJyHl7pTUREljAwiIjIEgYGERFZwsAgIiJLGBhERGQJA4OI\niCxhYBARkSUMDCIissTV1WrpBuKMSqsAq60SuQtHGEREZAkDg4iILGFgEBGRJQwMIiKyhIFBRESW\nMDCIiMgSBgYREVnCwCAiIksYGEREZInTAkNEFolIoXn/7uqvPS0iKiJBzmqfiIgalzNHGEsAJFWf\nKSJdANwJ4KAT2yYiokbmtMBQ1U0Aimt46XUAzwBQZ7VNRESNz6XHMERkBIAjqpphYdmJIpImImlF\nRUUu6B0REdXFZYEhIn4AZgJ43sryqrpQVeNVNT44ONi5nSMioqty5QijJ4DuADJEpABAKICdItLR\nhX0gIqIGctn9MFR1N4CbHM/N0IhX1ZOu6gMRETWcM0+rXQ5gK4AIETksIr90VltEROR8ThthqOr9\nV3m9m7PaJiKixscrvYmIyBIGBhERWcLAICIiSxgYRERkCQODiIgsYWAQEZElDAwiIrKEgUFERJYw\nMIiIyBIGBhERWcLAICIiSxgYRERkCQODiIgsYWAQEZElDAwiIrKEgUFERJYwMIiIyBIGBhERWcLA\nICIiS5wWGCKySEQKRSSzyrxXRSRHRP4tIp+JSFtntU9ERI3LmSOMJQCSqs37EkCsqt4MIA/A75zY\nPhERNSKnBYaqbgJQXG3eP1TVbj7dBiDUWe0TEVHjcucxjEcB/K22F0VkooikiUhaUVGRC7tFREQ1\ncUtgiMhMAHYAy2pbRlUXqmq8qsYHBwe7rnNERFQjL1c3KCIPAbgLwO2qqq5un4iIGsalgSEiSQB+\nC+A2Vb3gyraJiOjaOPO02uUAtgKIEJHDIvJLAG8BCADwpYjsEpH5zmqfiIgal9NGGKp6fw2z33dW\ne0RE5Fy80puIiCxhYBARkSUMDCIisoSBQUREljAwiIjIEgYGERFZwsAgIiJLGBhERGQJA4OIiCxh\nYBARkSUMDCIisoSBQUREljAwiIjIEgYGERFZwsAgIiJLGBhERGQJA4OIiCxhYBARkSXOvKf3IhEp\nFJHMKvMCReRLEck3f7ZzVvtERNS4nDnCWAIgqdq8GQC+VtVwAF+bz4mIqBlwWmCo6iYAxdVmjwTw\ngTn9AYB7nNU+ERE1Llcfw+igqscAwPx5k4vbJyKiBvJydwdqIyITAUwEgLCwsDqXPZSY6JQ+dElN\ndcp2iYiaI1ePME6ISCcAMH8W1ragqi5U1XhVjQ8ODnZZB4mIqGauDozVAB4ypx8CsMrF7RMRUQM5\n87Ta5QC2AogQkcMi8ksAswHcKSL5AO40nxMRUTPgtGMYqnp/LS/d7qw2iahp27Fjx01eXl7vAYgF\nLxxuyioBZNrt9glxcXFXDh002YPeRHT98fLyeq9jx45RwcHBpz08PNTd/aGaVVZWSlFRUfTx48ff\nAzDCMZ8JT0SuFBscHHyWYdG0eXh4aHBwcAmMkeB/5rupP0R0Y/JgWDQP5r/TDzKCgUFERJYwMIio\n2duyZUvLFStWtHE8X7ZsWZtnn322ozPbXLt2bcCXX37ZqjG3OWPGDKf2+VoxMIio2UtLS/Nbt27d\nlcB44IEHSmbNmnXcmW1u2LAhIDU11b8xt/nGG290aszt1aS8vLzB61oKDBH52so8IqL6Onv2rMeQ\nIUN6RURERIeHh8e8++677VJTU/1uueWWiJiYmKhbb701/MCBAy0AICEhIWLKlCkhvXv3jurWrVvs\n+vXr/S9duiR/+tOfOq9Zs6ZdZGRk9LvvvtvujTfeaD9+/PgwABg1alS3Bx54IGzAgAG20NDQ3uvW\nrfNPTk7u1qNHj5hRo0Z1c/Tj008/bd23b9/I6OjoqOHDh/coKSnxAICQkJDeTz31VOfo6Ogom80W\nnZ6e7pubm+udkpISPH/+/A6RkZHR69evrzE4Dh065HXnnXf2jIiIiI6IiIh2jEjuuOOOnjExMVG9\nevWKmTt3bhAATJ06NaSsrMwjMjIyesSIEd0BYN68eYG9e/eOioyMjB47dmxXu90OAHj99deDunXr\nFpuQkBAxZsyYro73mpeX5z1w4ECbzWaLHjhwoC0/P9/b8RlMmDAhdMCAAbbJkyd36dq1a+zRo0e9\nAKCiogJhYWGxx44du+pZs3UGhoj4ikgggCARaWfezyJQRLoB6Hy1jRMRXc2nn37aumPHjuW5ubnZ\n+fn5Wffee+/ZJ554ImzVqlV7s7Ky9jz00EMnn3766RDH8na7XXbv3r1nzpw5h1588cXOvr6++rvf\n/e7o3XfffTonJyf7V7/61enqbZSUlHht3bo1b/bs2YdGjx4dPn369BP5+flZOTk5Lbds2dLy2LFj\nXrNmzeq0adOmvOzs7D39+/e/8NJLL3VwrB8UFGTPzs7e8+ijjxbNnj27Q0RExOXx48cXTZ48+URO\nTk52UlJSaU3vbfLkyWGJiYnncnNzs7OysrL79+9/CQCWLVtWkJWVtWfXrl3ZCxYs6HD8+HHPefPm\nHfHx8anMycnJXr169f6dO3f6rly5MjAtLS0nJycn28PDQ+fPn9++oKCgxdy5cztt3759T2pqal5+\nfr5v1fbGjh17Ki8vL3v06NGnpkyZ0sXx2t69e303b96c9/777x+67777Tr333nuBALBq1arWUVFR\nFzt16mS/2r/V1RJlEoBpMMJhBwAx558F8PbVNk5EdDX9+/e/OHPmzC5TpkwJGTlyZEn79u3t+fn5\nLYcNG2YDgMrKSgQHB1/Zj5KcnHwaAAYNGnR++vTp3lba+MUvfnHGw8MD/fv3v9C+ffvyhISEiwBg\ns9ku7t271+fAgQPee/fu9U1ISIgEgPLycomLi7sSAmPHjj0NAAkJCRdWr15t+cZvW7ZsCVi5cuV+\nAPDy8kL79u0rAGDOnDkd1q1b1xYAjh8/3iIrK8u3Y8eO56uuu379+oDMzEy/Pn36RAHApUuXPG66\n6SZ7ampqqwEDBpzr0KFDBQD893//9+m8vDxfAEhPT2/1t7/9bS8ATJkypfiFF14IdWzv3nvvPe3l\nZfzJnzJlyskRI0b0ev755wsXLVoU9PDDD5+08n7qDAxV/V8A/ysij6vqm1Y2SERUHzfffHPZzp07\nsz/55JM2M2fODBkyZMjZXr16Xdy1a1dOTcv7+voqYPwBrqiokJqWqW0dT09PeHt7Xzmt18PDA3a7\nXTw9PfXWW289u2bNmv1XaVPtdrulNmuzdu3agI0bNwakpaXlBAQEVCYkJERcvHjxR3t7VFWSk5NP\nvf3220eqzk9JSWnbkHb9/f0rHdO9evUqDwoKsq9evTogPT291eeff77PyjYsHcNQ1TdFZJCIjBWR\n8Y5HQzpNRFRVQUFBi4CAgMqpU6cWT5s27URaWlqr4uJir6+++qoVAJSVlUlaWppvXdto3bp1RWlp\naYNP4hkyZMj5tLQ0/8zMTB8AOHfunMe///1vn7rWCQgIqDh37pxnXcsMHjz43KuvvhoMAHa7HcXF\nxR5nzpzxbNOmTUVAQEBlenq6b0ZGxpUzrby8vLSsrEwAICkp6ezatWvbHTlyxAsATpw44ZmXl+ed\nmJh4fvv27QFFRUWe5eXlWLVq1ZURT79+/c6/99577QBgwYIFgfHx8TXuKgOARx99tGjChAndR4wY\nUewYeVyN1YPeHwKYC+BWALeYj3hLLRAR1WHHjh0t+/btGxUZGRk9Z86cTi+99NLRjz76aO+MGTNC\nIyIiomNiYqI3btxY59lIw4cPP5eXl9fScdC7vn3o3LmzfcGCBQVjxozpYbPZouPi4iJ3795dZ0iN\nGjXqzLp169rWddD7nXfeObhx48YAm80WHRsbG71z586Wo0aNKrHb7WKz2aKfffbZzn369LmyK+qB\nBx4oioqKih4xYkT3uLi4S88999yR22+/3Waz2aKHDRtmO3ToUIvu3buXP/XUU8duueWWqMGDB0fY\nbLaLbdq0qXC09+GHHwbZbLbo5cuXt583b96h2vp///33l1y4cMFz4sSJp6x+TqJ69YsuRWQPgGi1\nsrATxMfHa1paWq2v8wZKTdOK1J5O2e7oxL1O2W5T5IzP0I2fn2RkZBT06dPH0v5yql1JSYlHmzZt\nKsvLy/Gzn/2s18MPP3xy/PjxZ+qzjU2bNvk99dRTXXbs2JFb2zIZGRlBffr06eZ4brX4YCaAjgCO\n1adDRETU+KZPn95506ZNrcvKyuS22247O27cuHqFxbPPPttxyZIlwYsXL67xmE1trAZGEIBsEfkO\nQJljpqqOqH0VIqIbw29/+9uOq1atCqw6b+TIkcVz5sxxysWDCxcuPHwt68+aNet4Qy5stBoYf6jv\nhomIbhRz5sw57qxwaEosBYaqbnR2R4iIqGmzFBgicg6A44C3N4AWAM6ramtndYyIiJoWqyOMgKrP\nReQeAAlO6RERETVJDbrQRVU/BzCsoY2KyFMikiUimSKyXETqPN+ZiOh6d/LkSc/Zs2cHO54XFBS0\nSEpK6uHOPlVndZfUvVWeesC4aK9B12SISAiAJ2Bc13FRRP4KYAyAJQ3ZHhHdeA4lJsY15va6pKbu\naMztNcSpU6c833///ZtmzJhRBADdunUrX79+vaWSHa5idYRxd5XHzwCcAzDyGtr1AtBSRLwA+AE4\neg3bIiJyutzcXO8ePXrEjBkzpmuvXr1iBg8eHF5aWipZWVk+iYmJ4TExMVFxcXER6enpvgCQlZXl\n06dPn8jY2NioadOmdfbz8+sHGBfdDRw40OYol7506dK2APCb3/wm9NChQz6RkZHRkyZNCs3NzfUO\nDw+PAYCbb745smp5lISEhIjU1FS/s2fPeiQnJ3eLjY2NioqKurItZ7F6DOORxmpQVY+IyFwABwFc\nBPAPVf1H9eVEZCKAiQAQFhbWWM3Xy3V2lS0RXaODBw/6Ll26dN+gQYMO/PznP++RkpLS7sMPPwxa\nuHDhgd69e5dt2LCh1ZQpU8K2bduW99hjj3WZOnVq4aRJk4pfeeWVK7ua/Pz8KtetW/d9YGBg5bFj\nx7wGDBgQOXbs2DOvvfba4bvuuqtlTk5ONmAElGOdUaNGFS9btiwwPj7+6IEDB1oUFha2SExMvPDY\nY4+FDB069OzHH39ccPLkSc/4+PioESNGnG3dunVlTf2/VlZrSYWKyGciUigiJ0TkExEJvfqaNW6r\nHYzRSXcYZdNbici46sup6kJVjVfV+ODg4OovExG5XEhISNmgQYMuAkC/fv0uFBQU+KSnp/snJyf3\njIyMjJ46dWrXwsLCFgCQnp7u/+ijjxYDwIQJE67Ua6qsrJRp06aF2my26KFDh9oKCwu9Dx8+XOeX\n9/Hjx592lFVPSUlpd/fdd58GgG+++ab166+/3ikyMjL61ltvjSgrK5Pvv//eUsn3hrB64d5iAH8B\nkGw+H2fOu7MBbd4BYL+qFgGAiHwKYBCApQ3YFhGRy1Qtje7p6aknTpzwCggIsDtGBVYsWLAg8NSp\nU167d+/e4+PjoyEhIb1rKm9eVffu3cvbtm1r3759e8tPP/00cMGCBQcAQFWxcuXK7/v06VNW1/qN\nxeoxjGBVXayqdvOxBEBDv/YfBPBfIuInIgLgdgB7GrgtIiK3ad26dWVoaOjlRYsWtQOMmz1t3bq1\nJQD07du3dMmSJe0AYNGiRVfKhpSUlHgGBQWV+/j46Jo1awKOHj3qDQBt2rSpOH/+fK1/k++7777i\nWbNmdTx37pyn4wZQQ4cOPfvaa691qKw09kBt3ry5pdPeLKwHxkkRGScinuZjHADLJXGrUtXtAFYC\n2Algt9mHhQ3ZFhGRuy1fvnzf4sWLgxz3JP/kk0/aAsCbb7556M033+zQu3fvqGPHjrXw9/evAIAJ\nEyYUZ2RktIqNjY1aunRpYPfu3S8BQMeOHSvi4uJKw8PDYyZNmvSjXf7jxo07vW7dusCRI0cWO+bN\nnj37qN1ul8jIyOjw8PCY5557LqT6eo3JannzMABvARgI43TaLQCeUNWDzuycg7vKm2+Z1fgnb91I\nB71Z3vzaXWcnXtxQ5c3PnTvn0apVq0oPDw8sXLiw3YoVKwK//vrrZvXL29Dy5i8BeEhVTwOAiATC\nuKHSo43eQyKi68DmzZv9nnzyyTBVRevWrSuWLFlS4O4+XSurgXGzIywAQFWLRaSfk/pERNTsJSUl\nlebm5lo+GN4cWD2G4WGeDgvgygjDatgQEdF1wOof/dcAbBGRlTCOYfwPgJed1isiImpyrF7pnSIi\naTAKDgqAe1X1uhpqERFR3SzvVjIDgiFBRHSDalB5cyIiqr9XXnkl+K233moPAG+88Ub7goKCFo7X\nRo8e3XXHjh1N+lYPPHBNRM3OitSejVrefHTiXpeUN3/mmWeKHNNLly4N6tu378Vu3bqVA8CKFSsO\nuKIP14KBQQCcdPHjrMbfJJG75ObmeiclJYX369fvfGZmpl+PHj0uffzxxwUbNmxoNWPGjC4VFRXo\n06fPhZSUlAMtW7bUqVOnhvz9739v6+npqUOGDDm7cOHCw7/+9a87+/v7V3Tv3v1yZmam3/jx43v4\n+vpWpqWl7Rk2bJht7ty5h7Zu3dpq//79PvPnzz8MGCORHTt2+H3wwQeH5s2bF/jOO+90KC8vl/79\n+59PSUk54OXluj/j3CVFRGRRQUGB7+TJk4vy8vKyAwICKl966aUOkyZN6r5ixYq9eXl52Xa7Ha++\n+mrwiRMnPL/44ot2+fn5WXl5edmzZs06VnU7jzzyyOnY2NgLKSkp+3JycrL9/f2vlNx48MEHT3/x\nxRdX7muxcuXKwLFjx57euXOn78qVKwPT0tJycnJysj08PHT+/PntXfn+GRhERBZ17Njx8k9/+tPz\nAPDggw+e2rhxY0BoaGjZzTffXAYADz/88Klvv/02IDAwsMLHx6dyzJgxXT/44IO2/v7+lu9P0blz\nZ3uXLl3Kvv7661bHjx/33Ldvn++dd95Zun79+oDMzEy/Pn36REVGRkZ/++23rfft2+fjrPdaE+6S\nIiKyyCiwfXUtWrTArl279qxevbr1Rx991O6dd965adu2bXlW27nvvvtOL1++vF1kZOSl4cOHn/bw\n8ICqSnJy8qm33377SIPfwDXiCIOIyKJjx455f/XVV60A4C9/+UvgkCFDzh45csQ7MzPTBwBSUlLa\nJyYmnispKfEoLi72HD16dMn8+fMP7dmzx6/6tvz9/StKSko8a2pn3Lhxp9evX9/u448/Dhw7dmwx\nACQlJZ1du3ZtuyNHjngBwIkTJzzz8vKcdrOkmjAwiIgs6tGjx6VFixa1t9ls0adPn/Z67rnnCufP\nn1+QnJzc02azRXt4eODpp58uOnPmjGdSUlK4zWaLTkxMjPjjH/94qPq2xo8ff/Lxxx/vGhkZGV1a\nWvqDoUtwcHBFeHj4xSNHjvgMHTr0AgDExcVdeu65547cfvvtNpvNFj1s2DDboUOHWlTfrjNZKm/u\nbixv7nzO+Ayd8fkBTfczdAaWN286cnNzve+6667w/Pz8LHf3xVWqlzfnCIOIiCxhYBARWRAREXH5\nRhpd1ISBQUREljAwiIjIErcEhoi0FZGVIpIjIntEZKA7+kFERNa568K9/wWwXlXvExFvAD86R5mI\niJoWl48wRKQ1gJ8AeB8AVPWyqp5xdT+IiNwhNzfXe/78+YENWdfPz69fY/enPtwxwugBoAjAYhHp\nA2AHgCdV9XzVhURkIoCJABAWFubyTjrLzK8OO2W7L98R6pTtkjXOuhaIFX9rNvOrw41a3vzlO0Jd\nUt4cAPLz831WrFgROHny5OLqr5WXl6NFC5dei1cv7jiG4QWgP4B3VLUfgPMAZlRfSFUXqmq8qsYH\nBwe7uo9ERD+Qm5vr3aNHj5gxY8Z07dWrV8zgwYPDS0tLJSsryycxMTE8JiYmKi4uLiI9Pd0XAEaN\nGtVt8eLF7RzrO0YHM2fODElLS/OPjIyMfuGFF25644032g8fPrzHsGHDeiUmJtpKSko8Bg4caIuO\njo6y2WzRS5cubVtbn1zNHYFxGMBhVd1uPl8JI0CIiJq0gwcP+j7xxBOF33//fVabNm0qUlJS2k2Y\nMKHrvHnzDmZlZe159dVXD0+ZMqXOXSIvv/zykfj4+NKcnJzs3//+94UAsHPnTv/ly5fv37ZtW56f\nn1/lunXrvs/Ozt6zcePGvGeffTa0stJysVuncvkuKVU9LiKHRCRCVXMB3A7eK5yImoGQkJCyQYMG\nXQSAfv36XSgoKPBJT0/3T05OvlLD5fLly9ZK2laRmJh4tkOHDhUAUFlZKdOmTQvdtm2bv4eHBwoL\nC70PHz7sFRYWZm+8d9Iw7jpL6nEAy8wzpPYBeMRN/SAisszb2/tK8T1PT089ceKEV0BAgD0nJ+dH\nX3q9vLy0oqICAFBZWYny8vJag8TPz+/KEGLBggWBp06d8tq9e/ceHx8fDQkJ6X3x4sUmcc2cWzqh\nqrvM4xM3q+o9qnraHf0gIroWrVu3rgwNDb28aNGidoARDFu3bm0JAF27dr28Y8cOPwBYtmxZW7vd\nLgDQpk2bitLS0hrLmgNASUmJZ1BQULmPj4+uWbMm4OjRoy4tYV6XJpFaRETN1fLly/ctXrw4KCIi\nIjo8PDzmk08+aQsAjz/+eNGWLVsCevfuHbVt27ZWLVu2rASAhISEi15eXhoRERH9wgsv3FR9exMm\nTCjOyMhoFRsbG7V06dLA7t27X3L1e6oN77hHRM2OK0+DdahefPDFF1884ZhOTU3Nr758ly5d7BkZ\nGTmO54475fn4+OjWrVur333vlGOiU6dO9l27duWgBhcuXEi/hrdwzTjCICIiSxgYRERkCQODiIgs\nYWAQkStVVlZW1vs6BXI989/pB1cMMjCIyJUyi4qK2jA0mrbKykopKipqAyCz6nyeJUVELmO32ycc\nP378vePHj8eCX1ibskoAmXa7fULVmQwMInKZuLi4QgAj3N0PahgmPBERWcLAICIiSxgYRERkCQOD\niIgsYWAQEZElDAwiIrKEgUFERJYwMIiIyBIGBhERWeK2wBARTxFJF5G17uoDERFZ584RxpMA9rix\nfSIiqge3BIaIhAL4BYD33NE+ERHVn7tGGP8fwDOoVmu9KhGZKCJpIpJWVFTkup4REVGNXB4YInIX\ngEJVrfMm7qq6UFXjVTU+ODjYRb0jIqLauGOEMRjACBEpAPARgGEistQN/SAionpweWCo6u9UNVRV\nuwEYA2CDqo5zdT+IiKh+eB0GERFZ4tY77qnqNwC+cWcfiIjIGo4wiIjIEgYGERFZwsAgIiJLGBhE\nRGQJA4OIiCxhYBARkSUMDCIisoSBQURElrj1wj2ihpj51eFG3+bLd4Q2+jabKmd8fsCN9RneqDjC\nICIiSxgYRERkCQODiIgsYWAQEZElDAwiIrKEgUFERJYwMIiIyBIGBhERWcLAICIiSxgYRERkicsD\nQ0S6iMg/RWSPiGSJyJOu7gMREdWfO2pJ2QH8RlV3ikgAgB0i8qWqZruhL0REZJHLRxiqekxVd5rT\n5wDsARDi6n4QEVH9uPUYhoh0A9APwPYaXpsoImkiklZUVOTqrhERUTVuCwwR8QfwCYBpqnq2+uuq\nulBV41U1Pjg42PUdJCKiH3BLYIhICxhhsUxVP3VHH4iIqH7ccZaUAHgfwB5V/bOr2yciooZxxwhj\nMIAHAQwTkV3m4+du6AcREdWDy0+rVdVvAYir2yUiomvDK72JiMgSBgYREVnCwCAiIksYGEREZAkD\ng4iILGFgEBGRJQwMIiKyhIFBRESWMDCIiMgSBgYREVnCwCAiIksYGEREZAkDg4iILGFgEBGRJQwM\nIiKyhIFBRESWMDCIiMgSBgYREVnilsAQkSQRyRWR70Vkhjv6QERE9ePywBARTwBvAxgOIBrA/SIS\n7ep+EBFR/bhjhJEA4HtV3aeqlwF8BGCkG/pBRET1IKrq2gZF7gOQpKoTzOcPAhigqo9VW24igInm\n0wgAuS7taP0EATjp7k40c/wMr11T/wxPqmqSuztBDeflhjalhnk/Si1VXQhgofO7c+1EJE1V493d\nj+aMn+G142dIzuaOXVKHAXSp8jwUwFE39IOIiOrBHYHxLwDhItJdRLwBjAGw2g39ICKienD5LilV\ntYvIYwD+DsATwCJVzXJ1PxpZs9h11sTxM7x2/AzJqVx+0JuIiJonXulNRESWMDCIiMiS6zYwRKSt\niExt4LpLzOtFGqMf34gIT3VsIBEZIiKD3N2P5qi23z0ReVhE3nJHn6h5u24DA0BbAA0KDGoaRMQL\nwBAADIx6MkvwEDWq6zkwZgOEIBBNAAAIEUlEQVToKSK7RORVEZkuIv8SkX+LyAuOhURkvDkvQ0Q+\nrLL+T0Rki4jsc4w2zG+734jIShHJEZFlIiLma7eLSLqI7BaRRSLiU71DInK/+XqmiMypMv+XIpJn\nbvtdEXlLRAJEZL+ItDCXaS0iBY7nTZGItBKRdeZnmSkio80+zxGR78xHL3PZriLytfnZfy0iYeb8\nJSLyZxH5J4AVACYDeMr8d0wUkWRz2xkissmNb9dpROQZEXnCnH5dRDaY07eLyNI6fo9KReRFEdkO\nYGC1bT5i/o5tBDDYle+HriOqel0+AHQDkGlO/xTGKYcCIyTXAvgJgBgYJUeCzOUCzZ9LAHxsLhsN\no/YVYHzbLYFxsaEHgK0AbgXgC+AQAJu5XAqAaeb0NwDiAXQGcBBAMIzTmTcAuMecXwAgEEALAKkA\n3jLXXQzgHnN6IoDX3P25XuUzHwXg3SrP25jvbab5fDyAteb0GgAPmdOPAvi8yme/FoCn+fwPAJ6u\nss3dAELM6bbufs9O+hz/C8DH5nQqgO/M343fm48f/R6ZyyqA/6myHcfvXqcq63gD2Oz4HeODj/o8\nrucRRlU/NR/pAHYCiAQQDmAYgJWqehIAVLW4yjqfq2qlqmYD6FBl/neqelhVKwHsghFMEQD2q2qe\nucwHMAKpqlsAfKOqRapqB7DMXCYBwEZVLVbVchhB5fAegEfM6UdgBEhTthvAHeaIIlFVS8z5y6v8\ndHzzHQjgL+b0hzCC1+FjVa2opY3NAJaIyK9gXMdzPdoBIE5EAgCUwfhiEg8gEcAZ1Px7BAAVAD6p\nYXsDqqxzGcbIjajebpTAEAB/UtW+5qOXqr5vzq/tQpSyauvXNL8Cxre8mupj1dSH+syHqm4G0E1E\nboPxjTvTQjtuYwZmHIzg+JOIPO94qepita1eZfp8HW1MBvAcjPIyu0SkfcN73DSZXxwKYHxJ2AJj\nlDEUQE8YI4XaXKojaHnBFV2z6zkwzgEIMKf/DuBREfEHABEJEZGbAHwN4H8cf3REJLCBbeXA+MPe\ny3z+IICN1ZbZDuA2EQkyD0jeby7znTm/nXmQd1S19VJgfDNv6qMLiEhnABdUdSmAuQD6my+NrvJz\nqzm9BUZZGAB4AMC3tWy26r8jRKSnqm5X1edhVGbtUst6zd0mAE+bP1NhHMvZBWAbav49qst2AENE\npL15DCzZed2m65k7qtW6hKqeEpHNIpIJ4G8wdn9sNY9RlwIYp6pZIvIygI0iUgFjl9XDDWjrkog8\nAuBj84/+vwDMr7bMMRH5HYB/whhVfKGqqwBARGbB+E99FEA2jOMkDssA/BH/2a3TlPUG8KqIVAIo\nBzAFwEoAPuaBWA8Yf+AA4AkAi0RkOoAi/GfXW3VrAKwUkZEAHodxADwcxmf4NYAMZ70ZN0sFMBPA\nVlU9LyKXAKTW9XtUG3OdP8AI62Mwdster7vzyIlYGqQJEBF/VS01w+YzGPW1PjNfuw/ASFV90K2d\nbCARKQAQ7zhORETN13U7wmhm/iAid8A42+ofAD4HABF5E8atbH/uxr4REQHgCIOIiCy6ng96ExFR\nI2JgEBGRJQwMIiKyhIFBTZaI3CMi0e7uBxEZGBjUlN0Do5aX0wiruhJZxsC4AYlINxHZY1bGzRKR\nf4hISxHpKSLrRWSHiKSKSKSIeIpRsVfEuMdIpYj8xNxOqoj0EpHbzGqyu8So2BtQR9vPmJVWM0Rk\ntjnvV2JUEs4QkU9ExE+Me2CMgHEh4C6zbz/qn7l+TxHZZm7jRREpNeeLGJWKM802R5vzh4jIP0Xk\nLwB2i8hLIvJklT6+LGa1WCKqwt3VD/lw/QNGwUQ7gL7m878CGAfjyulwc94AABvM6fUwKvveBeMq\n9pkAfGAUXASMq7EHm9P+ALxqaXc4jJIgfuZzR3Xg9lWW+SOAx83pJQDuq/Jabf1bC+B+c3oygFJz\nehSAL2Fc1dwBRh2mTjCqDp8H0L3K57HTnPYAsLdqn/jggw/jwQv3blz7VXWXOb0Dxh/NQTDKmziW\ncdzTIxVGRdTuAP4E4Fcw6hf9y3x9M4A/i8gyAJ+q6uFa2rwDwGJVvQD8oDpwrIj8EcZNr/xh1P76\nAbMOWG39Gwhj9xVglICZa07fCmC5GgX5Tpj3grgFwFkYVYf3m/0oEJFTItIPRrCkq+qpWt4D0Q2L\ngXHjql51twOAM6rat4ZlHcXvOgN4HsB0GN/SNwGAqs4WkXUwrkjfJiJ3qGpODduprTrwEhj3dMgQ\nkYfNbVfnUUf/alNXFeHqFXHfg1FHrCOARfVog+iGwWMY5HAWwH4RSQau7P/vY762Hca3+0pVvQSj\nauokGEHiqCC7W1XnAEiDcb+RmvwDRtVgP3M9R3XgAADHzEqqD1RZ/kqlWlWtq3/b8J8qv2OqrL8J\nwGjzOEwwjFHSd7X07TMASTBGID8a4RARA4N+6AEAvxSRDABZAEYCgKqWwbij4DZzuVQYf8h3m8+n\nmQeWMwBchFEd+EdUdT2A1QDSRGQXjPLdAPD/YITSlzBKxTt8BGC6eSC9Z239AzANwK9F5DsYxygc\n1X4/A/BvGBVtNwB4RlWP19K3yzAqwP5Va7+nBNENjbWkqNkzRywXVVVFZAyMA+Ajr7ZetW14wCj7\nnayq+c7oJ1Fzx2MYdD2IA/CWGEfDz8C4R7hl5sWBawF8xrAgqh1HGNToRKQ3jPt0V1WmqgPc0R8i\nahwMDCIisoQHvYmIyBIGBhERWcLAICIiSxgYRERkCQODiIgs+T/LvTrWwTZq/wAAAABJRU5ErkJg\ngg==\n",
      "text/plain": [
       "<Figure size 395.5x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "fc = sns.factorplot(x=\"news_category\", hue=\"sentiment_category\", \n",
    "                    data=df, kind=\"count\", \n",
    "                    palette={\"negative\": \"#FE2020\", \n",
    "                             \"positive\": \"#BADD07\", \n",
    "                             \"neutral\": \"#68BFF5\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Most Negative World News Article: A Czech woman drowned after being trapped inside Prague's underground drainage system while participating in a global GPS-based treasure hunt, police officials said. The woman was geocaching when heavy downpours led to rapidly rising water. The body of the 27-year-old victim, who has not been identified, was found in the Vltava river.   \n",
      "\n",
      "Most Positive World News Article: Pope Francis on Sunday said he is praying that the upcoming summit between US President Donald Trump and North Korean leader Kim Jong-un succeeds in laying the groundwork for peace. Urging people around the world to pray for the summit, the pontiff said, \"I want to offer the beloved people of Korea an especial thought of friendship.\"\n"
     ]
    }
   ],
   "source": [
    "pos_idx = df[(df.news_category=='world') & (df.sentiment_score == 0.7)].index[0]\n",
    "neg_idx = df[(df.news_category=='world') & (df.sentiment_score == -0.296)].index[0]\n",
    "\n",
    "print('Most Negative World News Article:', news_df.iloc[neg_idx][['news_article']][0])\n",
    "print()\n",
    "print('Most Positive World News Article:', news_df.iloc[pos_idx][['news_article']][0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th colspan=\"3\" halign=\"left\">Predicted:</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>negative</th>\n",
       "      <th>neutral</th>\n",
       "      <th>positive</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">Actual:</th>\n",
       "      <th>negative</th>\n",
       "      <td>16</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>neutral</th>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>positive</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 Predicted:                 \n",
       "                   negative neutral positive\n",
       "Actual: negative         16       5        6\n",
       "        neutral           3       2        8\n",
       "        positive          4       5       25"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import model_evaluation_utils as meu\n",
    "meu.display_confusion_matrix_pretty(true_labels=sentiment_category, \n",
    "                                    predicted_labels=sentiment_category_tb, \n",
    "                                    classes=['negative', 'neutral', 'positive'])"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [conda root]",
   "language": "python",
   "name": "conda-root-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
